Make PostScript output text selectable

The glyph names used in the Type42 and Type1 fallback fonts are now of
the form "/uniXXXX" where XXXX is the unicode character for each
glyph. When converted to pdf (eg using ps2pdf), pdf viewers are now
able to correctly extract the text.
This commit is contained in:
Adrian Johnson 2007-10-14 00:04:21 +09:30
parent e347a7a7c3
commit 5af1b2280b
5 changed files with 219 additions and 10 deletions

View file

@ -508,18 +508,32 @@ _cairo_ps_surface_emit_truetype_font_subset (cairo_ps_surface_t *surface,
/* FIXME: Figure out how subset->x_max etc maps to the /FontBBox */
for (i = 1; i < font_subset->num_glyphs; i++)
_cairo_output_stream_printf (surface->final_stream,
"Encoding %d /g%d put\n", i, i);
for (i = 1; i < font_subset->num_glyphs; i++) {
if (font_subset->glyph_names != NULL) {
_cairo_output_stream_printf (surface->final_stream,
"Encoding %d /%s put\n",
i, font_subset->glyph_names[i]);
} else {
_cairo_output_stream_printf (surface->final_stream,
"Encoding %d /g%d put\n", i, i);
}
}
_cairo_output_stream_printf (surface->final_stream,
"/CharStrings %d dict dup begin\n"
"/.notdef 0 def\n",
font_subset->num_glyphs);
for (i = 1; i < font_subset->num_glyphs; i++)
_cairo_output_stream_printf (surface->final_stream,
"/g%d %d def\n", i, i);
for (i = 1; i < font_subset->num_glyphs; i++) {
if (font_subset->glyph_names != NULL) {
_cairo_output_stream_printf (surface->final_stream,
"/%s %d def\n",
font_subset->glyph_names[i], i);
} else {
_cairo_output_stream_printf (surface->final_stream,
"/g%d %d def\n", i, i);
}
}
_cairo_output_stream_printf (surface->final_stream,
"end readonly def\n");
@ -742,7 +756,6 @@ _cairo_ps_surface_emit_type3_font_subset (cairo_ps_surface_t *surface,
return CAIRO_STATUS_SUCCESS;
}
static cairo_status_t
_cairo_ps_surface_emit_unscaled_font_subset (cairo_scaled_font_subset_t *font_subset,
void *closure)
@ -750,6 +763,11 @@ _cairo_ps_surface_emit_unscaled_font_subset (cairo_scaled_font_subset_t *font_su
cairo_ps_surface_t *surface = closure;
cairo_status_t status;
status = _cairo_scaled_font_subset_create_glyph_names (font_subset);
if (status && status != CAIRO_INT_STATUS_UNSUPPORTED)
return status;
#if CAIRO_HAS_FT_FONT
status = _cairo_ps_surface_emit_type1_font_subset (surface, font_subset);
if (status != CAIRO_INT_STATUS_UNSUPPORTED)
@ -774,6 +792,10 @@ _cairo_ps_surface_emit_scaled_font_subset (cairo_scaled_font_subset_t *font_subs
cairo_ps_surface_t *surface = closure;
cairo_status_t status;
status = _cairo_scaled_font_subset_create_glyph_names (font_subset);
if (status && status != CAIRO_INT_STATUS_UNSUPPORTED)
return status;
status = _cairo_ps_surface_emit_type3_font_subset (surface, font_subset);
if (status != CAIRO_INT_STATUS_UNSUPPORTED)
return status;

View file

@ -262,6 +262,21 @@ _cairo_scaled_font_subsets_foreach_unscaled (cairo_scaled_font_subsets_t
cairo_scaled_font_subset_callback_func_t font_subset_callback,
void *closure);
/**
* _cairo_scaled_font_subset_create_glyph_names:
* @font_subsets: a #cairo_scaled_font_subsets_t
*
* Create an array of strings containing the glyph name for each glyph
* in @font_subsets. The array as store in font_subsets->glyph_names.
*
* Return value: CAIRO_STATUS_SUCCESS if successful,
* CAIRO_INT_STATUS_UNSUPPORTED if the font backend does not support
* mapping the glyph indices to unicode characters. Possible errors
* include CAIRO_STATUS_NO_MEMORY.
**/
cairo_private cairo_int_status_t
_cairo_scaled_font_subset_create_glyph_names (cairo_scaled_font_subset_t *subset);
typedef struct _cairo_cff_subset {
char *base_font;
int *widths;

View file

@ -37,6 +37,7 @@
* Carl D. Worth <cworth@cworth.org>
* Kristian Høgsberg <krh@redhat.com>
* Keith Packard <keithp@keithp.com>
* Adrian Johnson <ajohnson@redneon.com>
*/
#include "cairoint.h"
@ -100,6 +101,11 @@ typedef struct _cairo_sub_font_collection {
void *font_subset_callback_closure;
} cairo_sub_font_collection_t;
typedef struct _cairo_string_entry {
cairo_hash_entry_t base;
char *string;
} cairo_string_entry_t;
static void
_cairo_sub_font_glyph_init_key (cairo_sub_font_glyph_t *sub_font_glyph,
unsigned long scaled_font_glyph_index)
@ -402,6 +408,7 @@ _cairo_sub_font_collect (void *entry, void *closure)
subset.subset_id = i;
subset.glyphs = collection->glyphs;
subset.num_glyphs = collection->num_glyphs;
subset.glyph_names = NULL;
/* No need to check for out of memory here. If to_unicode is NULL, the PDF
* surface does not emit an ToUnicode stream */
subset.to_unicode = _cairo_malloc_ab (collection->num_glyphs, sizeof (unsigned long));
@ -417,6 +424,12 @@ _cairo_sub_font_collect (void *entry, void *closure)
if (subset.to_unicode != NULL)
free (subset.to_unicode);
if (subset.glyph_names != NULL) {
for (j = 0; j < collection->num_glyphs; j++)
free (subset.glyph_names[j]);
free (subset.glyph_names);
}
if (collection->status)
break;
}
@ -686,3 +699,149 @@ _cairo_scaled_font_subsets_foreach_unscaled (cairo_scaled_font_subsets_t *fo
closure,
FALSE);
}
static cairo_bool_t
_cairo_string_equal (const void *key_a, const void *key_b)
{
const cairo_string_entry_t *a = key_a;
const cairo_string_entry_t *b = key_b;
if (strcmp (a->string, b->string) == 0)
return TRUE;
else
return FALSE;
}
static void
_cairo_string_init_key (cairo_string_entry_t *key, char *s)
{
unsigned long sum = 0;
unsigned int i;
for (i = 0; i < strlen(s); i++)
sum += s[i];
key->base.hash = sum;
key->string = s;
}
static cairo_string_entry_t *
create_string_entry (char *s)
{
cairo_string_entry_t *entry;
entry = malloc (sizeof (cairo_string_entry_t));
if (entry == NULL) {
_cairo_error_throw (CAIRO_STATUS_NO_MEMORY);
return NULL;
}
_cairo_string_init_key (entry, s);
return entry;
}
cairo_int_status_t
_cairo_scaled_font_subset_create_glyph_names (cairo_scaled_font_subset_t *subset)
{
const cairo_scaled_font_backend_t *backend;
unsigned int i;
cairo_status_t status;
cairo_hash_table_t *names;
cairo_string_entry_t key, *entry;
char buf[30];
if (subset->to_unicode == NULL) {
return CAIRO_INT_STATUS_UNSUPPORTED;
}
if (_cairo_truetype_create_glyph_to_unicode_map (subset) != CAIRO_STATUS_SUCCESS) {
backend = subset->scaled_font->backend;
if (backend->map_glyphs_to_unicode == NULL) {
return CAIRO_INT_STATUS_UNSUPPORTED;
}
backend->map_glyphs_to_unicode (subset->scaled_font, subset);
}
subset->glyph_names = calloc (subset->num_glyphs, sizeof (char *));
names = _cairo_hash_table_create (_cairo_string_equal);
if (names == NULL) {
status = CAIRO_STATUS_NO_MEMORY;
goto FAIL1;
}
subset->glyph_names[0] = strdup (".notdef");
if (subset->glyph_names[0] == NULL) {
status = CAIRO_STATUS_NO_MEMORY;
goto FAIL1;
}
entry = create_string_entry (subset->glyph_names[0]);
if (entry == NULL) {
status = CAIRO_STATUS_NO_MEMORY;
goto FAIL2;
}
status = _cairo_hash_table_insert (names, &entry->base);
if (status) {
free (entry);
goto CLEANUP_HASH;
}
for (i = 0; i < subset->num_glyphs; i++) {
if (subset->to_unicode[i] <= 0xffff) {
snprintf (buf, sizeof(buf), "uni%04X", (unsigned int)(subset->to_unicode[i]));
_cairo_string_init_key (&key, buf);
if (_cairo_hash_table_lookup (names, &key.base,
(cairo_hash_entry_t **) &entry)) {
snprintf (buf, sizeof(buf), "g%d", i);
}
} else {
snprintf (buf, sizeof(buf), "g%d", i);
}
subset->glyph_names[i] = strdup (buf);
if (subset->glyph_names[i] == NULL) {
status = CAIRO_STATUS_NO_MEMORY;
goto CLEANUP_HASH;
}
entry = create_string_entry (subset->glyph_names[i]);
if (entry == NULL) {
status = CAIRO_STATUS_NO_MEMORY;
goto CLEANUP_HASH;
}
status = _cairo_hash_table_insert (names, &entry->base);
if (status) {
free (entry);
goto CLEANUP_HASH;
}
}
return 0;
CLEANUP_HASH:
while (1) {
entry = _cairo_hash_table_random_entry (names, NULL);
if (entry == NULL)
break;
_cairo_hash_table_remove (names, (cairo_hash_entry_t *) entry);
free (entry);
}
_cairo_hash_table_destroy (names);
if (status == CAIRO_STATUS_SUCCESS)
return status;
FAIL2:
for (i = 0; i < subset->num_glyphs; i++) {
if (subset->glyph_names[i] != NULL)
free (subset->glyph_names[i]);
}
FAIL1:
free (subset->glyph_names);
subset->glyph_names = NULL;
return status;
}

View file

@ -465,7 +465,13 @@ cairo_type1_font_write_charstrings (cairo_type1_font_t *font,
goto fail;
charstring_encrypt (&data);
length = _cairo_array_num_elements (&data);
_cairo_output_stream_printf (encrypted_output, "/g%d %d RD ", i, length);
if (font->scaled_font_subset->glyph_names != NULL) {
_cairo_output_stream_printf (encrypted_output, "/%s %d RD ",
font->scaled_font_subset->glyph_names[i],
length);
} else {
_cairo_output_stream_printf (encrypted_output, "/g%d %d RD ", i, length);
}
_cairo_output_stream_write (encrypted_output,
_cairo_array_index (&data, 0),
length);
@ -527,8 +533,14 @@ cairo_type1_font_write_header (cairo_type1_font_t *font,
"} readonly def\n"
"/Encoding 256 array\n"
"0 1 255 {1 index exch /.notdef put} for\n");
for (i = 0; i < font->scaled_font_subset->num_glyphs; i++)
_cairo_output_stream_printf (font->output, "dup %d /g%d put\n", i, i);
for (i = 1; i < font->scaled_font_subset->num_glyphs; i++) {
if (font->scaled_font_subset->glyph_names != NULL) {
_cairo_output_stream_printf (font->output, "dup %d /%s put\n",
i, font->scaled_font_subset->glyph_names[i]);
} else {
_cairo_output_stream_printf (font->output, "dup %d /g%d put\n", i, i);
}
}
_cairo_output_stream_printf (font->output,
"readonly def\n"
"currentdict end\n"

View file

@ -459,6 +459,7 @@ typedef struct _cairo_scaled_font_subset {
*/
unsigned long *glyphs;
unsigned long *to_unicode;
char **glyph_names;
unsigned int num_glyphs;
cairo_bool_t is_composite;
} cairo_scaled_font_subset_t;