subsetting: Support unicode fontnames

Most fonts use Window platform specific encoded font names since they
allow unicode names.

- Make _cairo_truetype_read_font_name() read the Windows platform
  names first. If this fails, fallback to reading he the Mac platform
  MacRoman encoded name.

- Use the PDF method of encoding non ASCII PS font names. Poppler will
  correctly extract the unicode name.

- Make PDF embed the font family name as AsciiHex if the name is not ASCII.
This commit is contained in:
Adrian Johnson 2011-09-15 21:52:26 +09:30
parent 1e67fb4490
commit 47e16d0e56
4 changed files with 252 additions and 68 deletions

View file

@ -2837,13 +2837,13 @@ _cairo_cff_subset_init (cairo_cff_subset_t *cff_subset,
}
if (font->font_name) {
cff_subset->font_name = strdup (font->font_name);
if (cff_subset->font_name == NULL) {
cff_subset->family_name_utf8 = strdup (font->font_name);
if (cff_subset->family_name_utf8 == NULL) {
status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
goto fail2;
}
} else {
cff_subset->font_name = NULL;
cff_subset->family_name_utf8 = NULL;
}
cff_subset->widths = calloc (sizeof (double), font->scaled_font_subset->num_glyphs);
@ -2877,7 +2877,7 @@ _cairo_cff_subset_init (cairo_cff_subset_t *cff_subset,
fail4:
free (cff_subset->widths);
fail3:
free (cff_subset->font_name);
free (cff_subset->family_name_utf8);
fail2:
free (cff_subset->ps_name);
fail1:
@ -2890,7 +2890,7 @@ void
_cairo_cff_subset_fini (cairo_cff_subset_t *subset)
{
free (subset->ps_name);
free (subset->font_name);
free (subset->family_name_utf8);
free (subset->widths);
free (subset->data);
}
@ -3249,7 +3249,7 @@ _cairo_cff_fallback_init (cairo_cff_subset_t *cff_subset,
if (unlikely (status))
goto fail2;
cff_subset->font_name = NULL;
cff_subset->family_name_utf8 = NULL;
cff_subset->ps_name = strdup (font->ps_name);
if (unlikely (cff_subset->ps_name == NULL)) {
status = _cairo_error (CAIRO_STATUS_NO_MEMORY);

View file

@ -3959,6 +3959,61 @@ _cairo_pdf_surface_write_pages (cairo_pdf_surface_t *surface)
"endobj\n");
}
static cairo_status_t
_utf8_to_pdf_string (const char *utf8, char **str_out)
{
int i;
int len;
cairo_bool_t ascii;
char *str;
cairo_status_t status = CAIRO_STATUS_SUCCESS;
ascii = TRUE;
len = strlen (utf8);
for (i = 0; i < len; i++) {
unsigned c = utf8[i];
if (c < 32 || c > 126 || c == '(' || c == ')' || c == '\\') {
ascii = FALSE;
break;
}
}
if (ascii) {
str = malloc (len + 3);
if (str == NULL)
return _cairo_error (CAIRO_STATUS_NO_MEMORY);
str[0] = '(';
for (i = 0; i < len; i++)
str[i+1] = utf8[i];
str[i+1] = ')';
str[i+2] = 0;
} else {
uint16_t *utf16 = NULL;
int utf16_len = 0;
status = _cairo_utf8_to_utf16 (utf8, -1, &utf16, &utf16_len);
if (unlikely (status))
return status;
str = malloc (utf16_len*4 + 7);
if (str == NULL) {
free (utf16);
return _cairo_error (CAIRO_STATUS_NO_MEMORY);
}
strcpy (str, "<FEFF");
for (i = 0; i < utf16_len; i++)
snprintf (str + 4*i + 5, 5, "%04X", utf16[i]);
strcat (str, ">");
free (utf16);
}
*str_out = str;
return status;
}
static cairo_status_t
_cairo_pdf_surface_emit_unicode_for_glyph (cairo_pdf_surface_t *surface,
const char *utf8)
@ -4243,10 +4298,17 @@ _cairo_pdf_surface_emit_cff_font (cairo_pdf_surface_t *surface,
tag,
subset->ps_name);
if (subset->font_name) {
if (subset->family_name_utf8) {
char *pdf_str;
status = _utf8_to_pdf_string (subset->family_name_utf8, &pdf_str);
if (unlikely (status))
return status;
_cairo_output_stream_printf (surface->output,
" /FontFamily (%s)\n",
subset->font_name);
" /FontFamily %s\n",
pdf_str);
free (pdf_str);
}
_cairo_output_stream_printf (surface->output,
@ -4681,10 +4743,17 @@ _cairo_pdf_surface_emit_truetype_font_subset (cairo_pdf_surface_t *surface,
tag,
subset.ps_name);
if (subset.font_name) {
if (subset.family_name_utf8) {
char *pdf_str;
status = _utf8_to_pdf_string (subset.family_name_utf8, &pdf_str);
if (unlikely (status))
return status;
_cairo_output_stream_printf (surface->output,
" /FontFamily (%s)\n",
subset.font_name);
" /FontFamily %s\n",
pdf_str);
free (pdf_str);
}
_cairo_output_stream_printf (surface->output,

View file

@ -345,7 +345,7 @@ cairo_private cairo_int_status_t
_cairo_scaled_font_subset_create_glyph_names (cairo_scaled_font_subset_t *subset);
typedef struct _cairo_cff_subset {
char *font_name;
char *family_name_utf8;
char *ps_name;
double *widths;
double x_min, y_min, x_max, y_max;
@ -427,7 +427,7 @@ cairo_private void
_cairo_cff_fallback_fini (cairo_cff_subset_t *cff_subset);
typedef struct _cairo_truetype_subset {
char *font_name;
char *family_name_utf8;
char *ps_name;
double *widths;
double x_min, y_min, x_max, y_max;

View file

@ -1137,13 +1137,13 @@ cairo_truetype_subset_init_internal (cairo_truetype_subset_t *truetype_subse
}
if (font->base.font_name != NULL) {
truetype_subset->font_name = strdup (font->base.font_name);
if (unlikely (truetype_subset->font_name == NULL)) {
truetype_subset->family_name_utf8 = strdup (font->base.font_name);
if (unlikely (truetype_subset->family_name_utf8 == NULL)) {
status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
goto fail2;
}
} else {
truetype_subset->font_name = NULL;
truetype_subset->family_name_utf8 = NULL;
}
/* The widths array returned must contain only widths for the
@ -1201,7 +1201,7 @@ cairo_truetype_subset_init_internal (cairo_truetype_subset_t *truetype_subse
fail4:
free (truetype_subset->widths);
fail3:
free (truetype_subset->font_name);
free (truetype_subset->family_name_utf8);
fail2:
free (truetype_subset->ps_name);
fail1:
@ -1228,7 +1228,7 @@ void
_cairo_truetype_subset_fini (cairo_truetype_subset_t *subset)
{
free (subset->ps_name);
free (subset->font_name);
free (subset->family_name_utf8);
free (subset->widths);
free (subset->data);
free (subset->string_offsets);
@ -1395,6 +1395,107 @@ cleanup:
return status;
}
static cairo_status_t
find_name (tt_name_t *name, int name_id, int platform, int encoding, int language, char **str_out)
{
tt_name_record_t *record;
int i, len;
char *str;
char *p;
cairo_bool_t has_tag;
cairo_status_t status;
str = NULL;
for (i = 0; i < be16_to_cpu (name->num_records); i++) {
record = &(name->records[i]);
if (be16_to_cpu (record->name) == name_id &&
be16_to_cpu (record->platform) == platform &&
be16_to_cpu (record->encoding) == encoding &&
(language == -1 || be16_to_cpu (record->language) == language)) {
str = malloc (be16_to_cpu (record->length) + 1);
if (str == NULL)
return _cairo_error (CAIRO_STATUS_NO_MEMORY);
len = be16_to_cpu (record->length);
memcpy (str,
((char*)name) + be16_to_cpu (name->strings_offset) + be16_to_cpu (record->offset),
len);
str[be16_to_cpu (record->length)] = 0;
break;
}
}
if (str == NULL) {
*str_out = NULL;
return CAIRO_STATUS_SUCCESS;
}
if (platform == 3) { /* Win platform, unicode encoding */
/* convert to utf8 */
int size = 0;
char *utf8;
uint16_t *u = (uint16_t *) str;
int u_len = len/2;
for (i = 0; i < u_len; i++)
size += _cairo_ucs4_to_utf8 (be16_to_cpu(u[i]), NULL);
utf8 = malloc (size + 1);
if (utf8 == NULL) {
status =_cairo_error (CAIRO_STATUS_NO_MEMORY);
goto fail;
}
p = utf8;
for (i = 0; i < u_len; i++)
p += _cairo_ucs4_to_utf8 (be16_to_cpu(u[i]), p);
*p = 0;
free (str);
str = utf8;
} else if (platform == 1) { /* Mac platform, Mac Roman encoding */
/* Replace characters above 127 with underscores. We could use
* a lookup table to convert to unicode but since most fonts
* include a unicode name this is just a rarely used fallback. */
for (i = 0; i < len; i++) {
if ((unsigned char)str[i] > 127)
str[i] = '_';
}
}
/* If font name is prefixed with a PDF subset tag, strip it off. */
p = str;
len = strlen (str);
has_tag = FALSE;
if (len > 7 && p[6] == '+') {
has_tag = TRUE;
for (i = 0; i < 6; i++) {
if (p[i] < 'A' || p[i] > 'Z') {
has_tag = FALSE;
break;
}
}
}
if (has_tag) {
p = malloc (len - 6);
if (unlikely (p == NULL)) {
status =_cairo_error (CAIRO_STATUS_NO_MEMORY);
goto fail;
}
memcpy (p, str + 7, len - 7);
p[len-7] = 0;
free (str);
str = p;
}
*str_out = str;
return CAIRO_STATUS_SUCCESS;
fail:
free (str);
return status;
}
cairo_int_status_t
_cairo_truetype_read_font_name (cairo_scaled_font_t *scaled_font,
char **ps_name_out,
@ -1403,11 +1504,9 @@ _cairo_truetype_read_font_name (cairo_scaled_font_t *scaled_font,
cairo_status_t status;
const cairo_scaled_font_backend_t *backend;
tt_name_t *name;
tt_name_record_t *record;
unsigned long size;
int i, j;
char *ps_name = NULL;
char *font_name = NULL;
char *family_name = NULL;
backend = scaled_font->backend;
if (!backend->load_truetype_table)
@ -1425,76 +1524,92 @@ _cairo_truetype_read_font_name (cairo_scaled_font_t *scaled_font,
if (name == NULL)
return _cairo_error (CAIRO_STATUS_NO_MEMORY);
status = backend->load_truetype_table (scaled_font,
status = backend->load_truetype_table (scaled_font,
TT_TAG_name, 0,
(unsigned char *) name,
&size);
if (status)
goto fail;
/* Extract the font name and PS name from the name table. At
* present this just looks for the Mac platform/Roman encoded font
* name. It should be extended to use any suitable font name in
* the name table.
*/
for (i = 0; i < be16_to_cpu(name->num_records); i++) {
record = &(name->records[i]);
if ((be16_to_cpu (record->platform) == 1) &&
(be16_to_cpu (record->encoding) == 0)) {
/* Find PS Name (name_id = 6). OT spec says PS name must be one of
* the following two encodings */
status = find_name (name, 6, 3, 1, 0x409, &ps_name); /* win, unicode, english-us */
if (unlikely(status))
goto fail;
if (be16_to_cpu (record->name) == 4) {
font_name = malloc (be16_to_cpu(record->length) + 1);
if (font_name == NULL) {
status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
goto fail;
}
strncpy(font_name,
((char*)name) + be16_to_cpu (name->strings_offset) + be16_to_cpu (record->offset),
be16_to_cpu (record->length));
font_name[be16_to_cpu (record->length)] = 0;
}
if (!ps_name) {
status = find_name (name, 6, 1, 0, 0, &ps_name); /* mac, roman, english */
if (unlikely(status))
goto fail;
}
if (be16_to_cpu (record->name) == 6) {
ps_name = malloc (be16_to_cpu(record->length) + 1);
if (ps_name == NULL) {
status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
goto fail;
}
strncpy(ps_name,
((char*)name) + be16_to_cpu (name->strings_offset) + be16_to_cpu (record->offset),
be16_to_cpu (record->length));
ps_name[be16_to_cpu (record->length)] = 0;
}
/* Find Family name (name_id = 1) */
status = find_name (name, 1, 3, 1, 0x409, &family_name); /* win, unicode, english-us */
if (unlikely(status))
goto fail;
if (font_name && ps_name)
break;
}
if (!family_name) {
status = find_name (name, 1, 3, 0, 0x409, &family_name); /* win, symbol, english-us */
if (unlikely(status))
goto fail;
}
if (!family_name) {
status = find_name (name, 1, 1, 0, 0, &family_name); /* mac, roman, english */
if (unlikely(status))
goto fail;
}
if (!family_name) {
status = find_name (name, 1, 3, 1, -1, &family_name); /* win, unicode, any language */
if (unlikely(status))
goto fail;
}
free (name);
/* Ensure PS name does not contain any spaces */
/* Ensure PS name is a valid PDF/PS name object. In PDF names are
* treated as UTF8 and non ASCII bytes, ' ', and '#' are encoded
* as '#' followed by 2 hex digits that encode the byte. By also
* encoding the characters in the reserved string we ensure the
* name is also PS compatible. */
if (ps_name) {
for (i = 0, j = 0; ps_name[j]; j++) {
if (ps_name[j] == ' ')
continue;
ps_name[i++] = ps_name[j];
static const char *reserved = "()<>[]{}/%#\\";
char buf[128]; /* max name length is 127 bytes */
char *src = ps_name;
char *dst = buf;
while (*src && dst < buf + 127) {
unsigned char c = *src;
if (c < 0x21 || c > 0x7e || strchr (reserved, c)) {
if (dst + 4 > buf + 127)
break;
snprintf (dst, 4, "#%02X", c);
src++;
dst += 3;
} else {
*dst++ = *src++;
}
}
*dst = 0;
free (ps_name);
ps_name = strdup (buf);
if (ps_name == NULL) {
status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
goto fail;
}
ps_name[i] = '\0';
}
*ps_name_out = ps_name;
*font_name_out = font_name;
*font_name_out = family_name;
return CAIRO_STATUS_SUCCESS;
fail:
free (name);
free (ps_name);
free (font_name);
free (family_name);
*ps_name_out = NULL;
*font_name_out = NULL;