mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
intel/perf: Store indices to strings rather than pointers
The compiler does a good job of deduplicating strings already, but we can eliminate the pointers to each string by combining the strings into a single char array and storing only an index into that array. The longest of the char arrays is the descriptions array, which is a little over 45 KiB, so still under MSVC's 64 KiB string literal limit [0]. Because the string length is under 64 KiB we can use uint16_t as the index type, which roughly doubles our savings as compared to an int. This cuts 77 KiB from iris_dri.so (0.5%) and libvulkan_intel.so (0.9%). text data bss dec hex filename 926811 25920 0 952731 e899b meson-generated_.._intel_perf_metrics.c.o (before) 924401 0 0 924401 e1af1 meson-generated_.._intel_perf_metrics.c.o (after) text data bss dec hex filename 14190852 391628 210004 14792484 e1b724 iris_dri.so (before) 14137732 365708 210004 14713444 e08264 iris_dri.so (after) text data bss dec hex filename 8184097 240184 22820 8447101 80e47d libvulkan_intel.so (before) 8131009 214264 22820 8368093 7fafdd libvulkan_intel.so (after) relinfo: iris_dri.so (before): 17765 relocations, 17545 relative (98%), 452 PLT entries, 1 for local syms (0%), 0 users iris_dri.so (after) : 15605 relocations, 15385 relative (98%), 452 PLT entries, 1 for local syms (0%), 0 users libvulkan_intel.so (before): 10720 relocations, 6989 relative (65%), 355 PLT entries, 1 for local syms (0%), 0 users libvulkan_intel.so (after) : 8560 relocations, 4829 relative (56%), 355 PLT entries, 1 for local syms (0%), 0 users [0] https://docs.microsoft.com/en-us/cpp/cpp/string-and-character-literals-cpp?view=msvc-170&viewFallbackFrom=vs-2019 Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15237>
This commit is contained in:
parent
df5e743c80
commit
6c0246dcf4
2 changed files with 68 additions and 14 deletions
|
|
@ -20,6 +20,7 @@
|
|||
# IN THE SOFTWARE.
|
||||
|
||||
import argparse
|
||||
import builtins
|
||||
import collections
|
||||
import os
|
||||
import sys
|
||||
|
|
@ -414,7 +415,9 @@ def counter_key(counter):
|
|||
return counter_key_tuple._make([counter.get(field) for field in counter_key_tuple._fields])
|
||||
|
||||
|
||||
def output_counter_struct(set, counter, idx):
|
||||
def output_counter_struct(set, counter, idx,
|
||||
name_to_idx, desc_to_idx,
|
||||
symbol_name_to_idx, category_to_idx):
|
||||
data_type = counter.data_type
|
||||
data_type_uc = data_type.upper()
|
||||
|
||||
|
|
@ -426,10 +429,10 @@ def output_counter_struct(set, counter, idx):
|
|||
|
||||
c("[" + str(idx) + "] = {\n")
|
||||
c_indent(3)
|
||||
c(".name = \"" + counter.name + "\",\n")
|
||||
c(".desc = \"" + counter.description + " " + desc_units(counter.units) + "\",\n")
|
||||
c(".symbol_name = \"" + counter.symbol_name + "\",\n")
|
||||
c(".category = \"" + counter.mdapi_group + "\",\n")
|
||||
c(".name_idx = " + str(name_to_idx[counter.name]) + ",\n")
|
||||
c(".desc_idx = " + str(desc_to_idx[counter.description + " " + desc_units(counter.units)]) + ",\n")
|
||||
c(".symbol_name_idx = " + str(symbol_name_to_idx[counter.symbol_name]) + ",\n")
|
||||
c(".category_idx = " + str(category_to_idx[counter.mdapi_group]) + ",\n")
|
||||
c(".type = INTEL_PERF_COUNTER_TYPE_" + semantic_type_uc + ",\n")
|
||||
c(".data_type = INTEL_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ",\n")
|
||||
c(".units = INTEL_PERF_COUNTER_UNITS_" + output_units(counter.units) + ",\n")
|
||||
|
|
@ -476,6 +479,29 @@ def output_counter_report(set, counter, counter_to_idx, current_offset):
|
|||
return current_offset + sizeof(c_type)
|
||||
|
||||
|
||||
def str_to_idx_table(strs):
|
||||
sorted_strs = sorted(strs)
|
||||
|
||||
str_to_idx = collections.OrderedDict()
|
||||
str_to_idx[sorted_strs[0]] = 0
|
||||
previous = sorted_strs[0]
|
||||
|
||||
for i in range(1, len(sorted_strs)):
|
||||
str_to_idx[sorted_strs[i]] = str_to_idx[previous] + len(previous) + 1
|
||||
previous = sorted_strs[i]
|
||||
|
||||
return str_to_idx
|
||||
|
||||
|
||||
def output_str_table(name: str, str_to_idx):
|
||||
c("\n")
|
||||
c("static const char " + name + "[] = {\n")
|
||||
c_indent(3)
|
||||
c("\n".join(f"/* {idx} */ \"{val}\\0\"" for val, idx in str_to_idx.items()))
|
||||
c_outdent(3)
|
||||
c("};\n")
|
||||
|
||||
|
||||
register_types = {
|
||||
'FLEX': 'flex_regs',
|
||||
'NOA': 'mux_regs',
|
||||
|
|
@ -728,6 +754,30 @@ def main():
|
|||
#include "perf/intel_perf_setup.h"
|
||||
"""))
|
||||
|
||||
names = builtins.set()
|
||||
descs = builtins.set()
|
||||
symbol_names = builtins.set()
|
||||
categories = builtins.set()
|
||||
for gen in gens:
|
||||
for set in gen.sets:
|
||||
for counter in set.counters:
|
||||
names.add(counter.get('name'))
|
||||
symbol_names.add(counter.get('symbol_name'))
|
||||
descs.add(counter.get('description') + " " + desc_units(counter.get('units')))
|
||||
categories.add(counter.get('mdapi_group'))
|
||||
|
||||
name_to_idx = str_to_idx_table(names)
|
||||
output_str_table("name", name_to_idx)
|
||||
|
||||
desc_to_idx = str_to_idx_table(descs)
|
||||
output_str_table("desc", desc_to_idx)
|
||||
|
||||
symbol_name_to_idx = str_to_idx_table(symbol_names)
|
||||
output_str_table("symbol_name", symbol_name_to_idx)
|
||||
|
||||
category_to_idx = str_to_idx_table(categories)
|
||||
output_str_table("category", category_to_idx)
|
||||
|
||||
# Print out all equation functions.
|
||||
for gen in gens:
|
||||
for set in gen.sets:
|
||||
|
|
@ -747,7 +797,11 @@ def main():
|
|||
key = counter_key(counter)
|
||||
if key not in counter_to_idx:
|
||||
counter_to_idx[key] = idx
|
||||
output_counter_struct(set, key, idx)
|
||||
output_counter_struct(set, key, idx,
|
||||
name_to_idx,
|
||||
desc_to_idx,
|
||||
symbol_name_to_idx,
|
||||
category_to_idx)
|
||||
idx += 1
|
||||
|
||||
c_outdent(3)
|
||||
|
|
@ -764,10 +818,10 @@ def main():
|
|||
{
|
||||
const struct intel_perf_query_counter_data *counter = &counters[counter_idx];
|
||||
|
||||
dest->name = counter->name;
|
||||
dest->desc = counter->desc;
|
||||
dest->symbol_name = counter->symbol_name;
|
||||
dest->category = counter->category;
|
||||
dest->name = &name[counter->name_idx];
|
||||
dest->desc = &desc[counter->desc_idx];
|
||||
dest->symbol_name = &symbol_name[counter->symbol_name_idx];
|
||||
dest->category = &category[counter->category_idx];
|
||||
dest->raw_max = raw_max;
|
||||
|
||||
dest->offset = offset;
|
||||
|
|
|
|||
|
|
@ -73,10 +73,10 @@ bdw_query_alloc(struct intel_perf_config *perf, int ncounters)
|
|||
}
|
||||
|
||||
struct intel_perf_query_counter_data {
|
||||
const char *name;
|
||||
const char *desc;
|
||||
const char *symbol_name;
|
||||
const char *category;
|
||||
uint16_t name_idx;
|
||||
uint16_t desc_idx;
|
||||
uint16_t symbol_name_idx;
|
||||
uint16_t category_idx;
|
||||
enum intel_perf_counter_type type;
|
||||
enum intel_perf_counter_data_type data_type;
|
||||
enum intel_perf_counter_units units;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue