mesa/src/util/xmlpool/gen_xmlpool.py
Mathieu Bridon bd27203f4d python: Rework bytes/unicode string handling
In both Python 2 and 3, opening a file without specifying the mode will
open it for reading in text mode ('r').

On Python 2, the read() method of a file object opened in mode 'r' will
return byte strings, while on Python 3 it will return unicode strings.

Explicitly specifying the binary mode ('rb') then decoding the byte
string means we always handle unicode strings on both Python 2 and 3.

Which in turns means all re.match(line) will return unicode strings as
well.

If we also make expandCString return unicode strings, we don't need the
call to the unicode() constructor any more.

We were using the ugettext() method because it always returns unicode
strings in Python 2, contrarily to the gettext() one which returns
byte strings. The ugettext() method doesn't exist on Python 3, so we
must use the right method on each version of Python.

The last hurdles are that Python 3 doesn't let us concatenate unicode
and byte strings directly, and that Python 2's stdout wants encoded byte
strings while Python 3's want unicode strings.

With these changes, the script gives the same output on both Python 2
and 3.

Signed-off-by: Mathieu Bridon <bochecha@daitauha.fr>
Reviewed-by: Dylan Baker <dylan@pnwbakers.com>
2018-08-10 15:14:48 -07:00

227 lines
7.5 KiB
Python

#
# Usage:
# gen_xmlpool.py /path/to/t_option.h localedir lang lang lang ...
#
# For each given language, this script expects to find a .mo file at
# `{localedir}/{language}/LC_MESSAGES/options.mo`.
#
from __future__ import print_function
import sys
import gettext
import re
if sys.version_info < (3, 0):
gettext_method = 'ugettext'
else:
gettext_method = 'gettext'
# Path to t_options.h
template_header_path = sys.argv[1]
localedir = sys.argv[2]
# List of supported languages
languages = sys.argv[3:]
# Escape special characters in C strings
def escapeCString (s):
escapeSeqs = {'\a' : '\\a', '\b' : '\\b', '\f' : '\\f', '\n' : '\\n',
'\r' : '\\r', '\t' : '\\t', '\v' : '\\v', '\\' : '\\\\'}
# " -> '' is a hack. Quotes (") aren't possible in XML attributes.
# Better use Unicode characters for typographic quotes in option
# descriptions and translations.
i = 0
r = ''
while i < len(s):
# Special case: escape double quote with \u201c or \u201d, depending
# on whether it's an open or close quote. This is needed because plain
# double quotes are not possible in XML attributes.
if s[i] == '"':
if i == len(s)-1 or s[i+1].isspace():
# close quote
q = u'\u201c'
else:
# open quote
q = u'\u201d'
r = r + q
elif s[i] in escapeSeqs:
r = r + escapeSeqs[s[i]]
else:
r = r + s[i]
i = i + 1
return r
# Expand escape sequences in C strings (needed for gettext lookup)
def expandCString (s):
escapeSeqs = {'a' : '\a', 'b' : '\b', 'f' : '\f', 'n' : '\n',
'r' : '\r', 't' : '\t', 'v' : '\v',
'"' : '"', '\\' : '\\'}
i = 0
escape = False
hexa = False
octa = False
num = 0
digits = 0
r = u''
while i < len(s):
if not escape:
if s[i] == '\\':
escape = True
else:
r = r + s[i]
elif hexa:
if (s[i] >= '0' and s[i] <= '9') or \
(s[i] >= 'a' and s[i] <= 'f') or \
(s[i] >= 'A' and s[i] <= 'F'):
num = num * 16 + int(s[i],16)
digits = digits + 1
else:
digits = 2
if digits >= 2:
hexa = False
escape = False
r = r + chr(num)
elif octa:
if s[i] >= '0' and s[i] <= '7':
num = num * 8 + int(s[i],8)
digits = digits + 1
else:
digits = 3
if digits >= 3:
octa = False
escape = False
r = r + chr(num)
else:
if s[i] in escapeSeqs:
r = r + escapeSeqs[s[i]]
escape = False
elif s[i] >= '0' and s[i] <= '7':
octa = True
num = int(s[i],8)
if num <= 3:
digits = 1
else:
digits = 2
elif s[i] == 'x' or s[i] == 'X':
hexa = True
num = 0
digits = 0
else:
r = r + s[i]
escape = False
i = i + 1
return r
# Expand matches. The first match is always a DESC or DESC_BEGIN match.
# Subsequent matches are ENUM matches.
#
# DESC, DESC_BEGIN format: \1 \2=<lang> \3 \4=gettext(" \5=<text> \6=") \7
# ENUM format: \1 \2=gettext(" \3=<text> \4=") \5
def expandMatches (matches, translations, end=None):
assert len(matches) > 0
nTranslations = len(translations)
i = 0
# Expand the description+enums for all translations
for lang,trans in translations:
i = i + 1
# Make sure that all but the last line of a simple description
# are extended with a backslash.
suffix = ''
if len(matches) == 1 and i < len(translations) and \
not matches[0].expand (r'\7').endswith('\\'):
suffix = ' \\'
text = escapeCString (getattr(trans, gettext_method) (expandCString (
matches[0].expand (r'\5'))))
text = (matches[0].expand (r'\1' + lang + r'\3"' + text + r'"\7') + suffix)
# In Python 2, stdout expects encoded byte strings, or else it will
# encode them with the ascii 'codec'
if sys.version_info.major == 2:
text = text.encode('utf-8')
print(text)
# Expand any subsequent enum lines
for match in matches[1:]:
text = escapeCString (getattr(trans, gettext_method) (expandCString (
match.expand (r'\3'))))
text = match.expand (r'\1"' + text + r'"\5')
# In Python 2, stdout expects encoded byte strings, or else it will
# encode them with the ascii 'codec'
if sys.version_info.major == 2:
text = text.encode('utf-8')
print(text)
# Expand description end
if end:
print(end, end='')
# Compile a list of translation classes to all supported languages.
# The first translation is always a NullTranslations.
translations = [("en", gettext.NullTranslations())]
for lang in languages:
try:
trans = gettext.translation ("options", localedir, [lang])
except IOError:
sys.stderr.write ("Warning: language '%s' not found.\n" % lang)
continue
translations.append ((lang, trans))
# Regular expressions:
reLibintl_h = re.compile (r'#\s*include\s*<libintl.h>')
reDESC = re.compile (r'(\s*DRI_CONF_DESC\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
reDESC_BEGIN = re.compile (r'(\s*DRI_CONF_DESC_BEGIN\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
reENUM = re.compile (r'(\s*DRI_CONF_ENUM\s*\([^,]+,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
reDESC_END = re.compile (r'\s*DRI_CONF_DESC_END')
# Print a header
print("/***********************************************************************\n" \
" *** THIS FILE IS GENERATED AUTOMATICALLY. DON'T EDIT! ***\n" \
" ***********************************************************************/")
# Process the options template and generate options.h with all
# translations.
template = open (template_header_path, "rb")
descMatches = []
for line in template:
line = line.decode('utf-8')
if len(descMatches) > 0:
matchENUM = reENUM .match (line)
matchDESC_END = reDESC_END.match (line)
if matchENUM:
descMatches.append (matchENUM)
elif matchDESC_END:
expandMatches (descMatches, translations, line)
descMatches = []
else:
sys.stderr.write (
"Warning: unexpected line inside description dropped:\n%s\n" \
% line)
continue
if reLibintl_h.search (line):
# Ignore (comment out) #include <libintl.h>
print("/* %s * commented out by gen_xmlpool.py */" % line)
continue
matchDESC = reDESC .match (line)
matchDESC_BEGIN = reDESC_BEGIN.match (line)
if matchDESC:
assert len(descMatches) == 0
expandMatches ([matchDESC], translations)
elif matchDESC_BEGIN:
assert len(descMatches) == 0
descMatches = [matchDESC_BEGIN]
else:
print(line, end='')
template.close()
if len(descMatches) > 0:
sys.stderr.write ("Warning: unterminated description at end of file.\n")
expandMatches (descMatches, translations)