dbus/test/unused-code-gc.py

241 lines
7.7 KiB
Python
Raw Normal View History

#! /usr/bin/python
import os
import sys
import string
import re
## hash from symbol name to list of symbols with that name,
## where the list of symbols contains a list representing each symbol
symbols = {}
roots = {}
def createBacklinks(name, syms):
for s in syms:
refs = s[2]
for r in refs:
## for each ref, add ourselves as a referencer
if symbols.has_key(r):
targets = symbols[r]
for t in targets:
if name not in t[5]:
t[5].append(name)
def markSymbol(frm, name):
if not symbols.has_key(name):
print "%s referenced but was not in the objdump"
syms = symbols[name]
## print ambiguous references unless they are internal noise like ".L129"
if len(syms) > 1 and name[0] != '.':
print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name)
print syms
for s in syms:
if s[4]:
pass ## already marked
else:
s[4] = 1
refs = s[2]
for r in refs:
markSymbol(s[0], r)
def cmpFilename(a, b):
v = cmp(a[1], b[1])
if v == 0:
v = cmp(a[0], b[0])
return v
def sizeAsString(bytes):
if bytes < 1024:
return "%d bytes" % bytes
elif bytes < 1024*1024:
return "%.2gK" % (bytes / 1024.0)
else:
return "%.2gM" % (bytes / 1024.0 / 1024.0)
def printLost():
list = []
filename = None
for (name, syms) in symbols.items():
s = syms[0] ## we always mark all or none for now
if not s[4] and name[0] != '.': ## skip .L129 type symbols
filename = s[3]
if not filename:
filename = "unknown file"
list.append ((name, filename, s[5], s[7]))
file_summaries = []
total_unused = 0
total_this_file = 0
filename = None
list.sort(cmpFilename)
for l in list:
next_filename = l[1]
if next_filename != filename:
if total_this_file > 0:
file_summaries.append (" %s may be unused in %s" % (sizeAsString(total_this_file), filename))
print "%s has these symbols not reachable from exported symbols:" % next_filename
filename = next_filename
total_this_file = 0
print " %s %s" % (l[0], sizeAsString(l[3]))
total_unused = total_unused + l[3]
total_this_file = total_this_file + l[3]
for trace in l[2]:
print " referenced from %s" % trace
for fs in file_summaries:
print fs
print "%s total may be unused" % sizeAsString(total_unused)
def main():
## 0001aa44 <_dbus_message_get_network_data>:
sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:')
## 1aa49: e8 00 00 00 00 call 1aa4e <_dbus_message_get_network_data+0xa>
ref_re = re.compile (' <([^>]+)> *$')
## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139
file_re = re.compile ('^(\/[^:].*):[0-9]+$')
## _dbus_message_get_network_data+0xa
funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+')
## 00005410 T dbus_address_entries_free
dynsym_re = re.compile ('T ([^ \n]+)$')
filename = sys.argv[1]
command = """
objdump -D --demangle -l %s
""" % filename
command = string.strip (command)
print "Running: %s" % command
f = os.popen(command)
## first we find which functions reference which other functions
current_sym = None
lines = f.readlines()
for l in lines:
addr = None
name = None
target = None
file = None
match = sym_re.match(l)
if match:
addr = match.group(1)
name = match.group(2)
else:
match = ref_re.search(l)
if match:
target = match.group(1)
else:
match = file_re.match(l)
if match:
file = match.group(1)
if name:
## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size
item = [name, addr, [], None, 0, [], 0, 0]
if symbols.has_key(name):
symbols[name].append(item)
else:
symbols[name] = [item]
if current_sym:
prev_addr = long(current_sym[1], 16)
our_addr = long(item[1], 16)
item[7] = our_addr - prev_addr
if item[7] < 0:
print "Computed negative size %d for %s" % (item[7], item[0])
item[7] = 0
current_sym = item
elif target and current_sym:
match = funcname_re.match(target)
if match:
## dump the "+address"
target = match.group(1)
if target == current_sym[0]:
pass ## skip self-references
else:
current_sym[2].append (target)
elif file and current_sym:
if file.startswith('/usr/include'):
## inlined libc thingy
pass
elif current_sym[0].startswith('.debug'):
## debug info
pass
elif current_sym[3] and current_sym[3] != file:
raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file))
else:
current_sym[3] = file
## now we need to find the roots (exported symbols)
command = "nm -D %s" % filename
print "Running: %s" % command
f = os.popen(command)
lines = f.readlines ()
for l in lines:
match = dynsym_re.search(l)
if match:
name = match.group(1)
if roots.has_key(name):
raise Exception("symbol %s exported twice?" % name)
else:
roots[name] = 1
print "%d symbols exported from this object" % len(roots)
## these functions are used only indirectly, so we don't
## notice they are used. Manually add them as roots...
vtable_roots = ['unix_finalize',
'unix_handle_watch',
'unix_disconnect',
'unix_connection_set',
'unix_do_iteration',
'unix_live_messages_changed',
'unix_get_unix_fd',
'handle_client_data_cookie_sha1_mech',
'handle_client_data_external_mech',
'handle_server_data_cookie_sha1_mech',
'handle_server_data_external_mech',
'handle_client_initial_response_cookie_sha1_mech',
'handle_client_initial_response_external_mech',
'handle_client_shutdown_cookie_sha1_mech',
'handle_client_shutdown_external_mech',
'handle_server_shutdown_cookie_sha1_mech',
'handle_server_shutdown_external_mech'
]
for vr in vtable_roots:
if roots.has_key(vr):
raise Exception("%s is already a root" % vr)
roots[vr] = 1
for k in roots.keys():
markSymbol("root", k)
for (k, v) in symbols.items():
createBacklinks(k, v)
print """
The symbols mentioned below don't appear to be reachable starting from
the dynamic exports of the library. However, this program is pretty
dumb; a limitation that creates false positives is that it can only
trace 'reachable' through hardcoded function calls, if a function is
called only through a vtable, it won't be marked reachable (and
neither will its children in the call graph).
"""
print "The following are hardcoded in as vtable roots: %s" % vtable_roots
printLost()
if __name__ == "__main__":
main()