tesseract  3.05.01
tesseract::PangoFontInfo Class Reference

#include <pango_font_info.h>

Public Types

enum  FontTypeEnum { UNKNOWN, SERIF, SANS_SERIF, DECORATIVE }
 

Public Member Functions

 PangoFontInfo ()
 
 ~PangoFontInfo ()
 
 PangoFontInfo (const string &name)
 
bool ParseFontDescriptionName (const string &name)
 
bool CoversUTF8Text (const char *utf8_text, int byte_length) const
 
int DropUncoveredChars (string *utf8_text) const
 
bool CanRenderString (const char *utf8_word, int len, std::vector< string > *graphemes) const
 
bool CanRenderString (const char *utf8_word, int len) const
 
bool GetSpacingProperties (const string &utf8_char, int *x_bearing, int *x_advance) const
 
string DescriptionName () const
 
const string & family_name () const
 
int font_size () const
 
bool is_bold () const
 
bool is_italic () const
 
bool is_smallcaps () const
 
bool is_monospace () const
 
bool is_fraktur () const
 
FontTypeEnum font_type () const
 
int resolution () const
 
void set_resolution (const int resolution)
 

Static Public Member Functions

static void SoftInitFontConfig ()
 
static void HardInitFontConfig (const string &fonts_dir, const string &cache_dir)
 

Friends

class FontUtils
 

Detailed Description

Definition at line 44 of file pango_font_info.h.

Member Enumeration Documentation

◆ FontTypeEnum

Constructor & Destructor Documentation

◆ PangoFontInfo() [1/2]

tesseract::PangoFontInfo::PangoFontInfo ( )

Definition at line 88 of file pango_font_info.cpp.

88  : desc_(NULL), resolution_(kDefaultResolution) {
89  Clear();
90 }
const int kDefaultResolution

◆ ~PangoFontInfo()

tesseract::PangoFontInfo::~PangoFontInfo ( )

Definition at line 114 of file pango_font_info.cpp.

114 { pango_font_description_free(desc_); }

◆ PangoFontInfo() [2/2]

tesseract::PangoFontInfo::PangoFontInfo ( const string &  name)
explicit

Definition at line 92 of file pango_font_info.cpp.

93  : desc_(NULL), resolution_(kDefaultResolution) {
94  if (!ParseFontDescriptionName(desc)) {
95  tprintf("ERROR: Could not parse %s\n", desc.c_str());
96  Clear();
97  }
98 }
bool ParseFontDescriptionName(const string &name)
const int kDefaultResolution
#define tprintf(...)
Definition: tprintf.h:31

Member Function Documentation

◆ CanRenderString() [1/2]

bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len,
std::vector< string > *  graphemes 
) const

Definition at line 393 of file pango_font_info.cpp.

394  {
395  if (graphemes) graphemes->clear();
396  // We check for font coverage of the text first, as otherwise Pango could
397  // (undesirably) fall back to another font that does have the required
398  // coverage.
399  if (!CoversUTF8Text(utf8_word, len)) {
400  return false;
401  }
402  // U+25CC dotted circle character that often (but not always) gets rendered
403  // when there is an illegal grapheme sequence.
404  const char32 kDottedCircleGlyph = 9676;
405  bool bad_glyph = false;
406  PangoFontMap* font_map = pango_cairo_font_map_get_default();
407  PangoContext* context = pango_context_new();
408  pango_context_set_font_map(context, font_map);
409  PangoLayout* layout;
410  {
411  // Pango is not relasing the cached layout.
413  layout = pango_layout_new(context);
414  }
415  if (desc_) {
416  pango_layout_set_font_description(layout, desc_);
417  } else {
418  PangoFontDescription *desc = pango_font_description_from_string(
419  DescriptionName().c_str());
420  pango_layout_set_font_description(layout, desc);
421  pango_font_description_free(desc);
422  }
423  pango_layout_set_text(layout, utf8_word, len);
424  PangoLayoutIter* run_iter = NULL;
425  { // Fontconfig caches some information here that is not freed before exit.
427  run_iter = pango_layout_get_iter(layout);
428  }
429  do {
430  PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
431  if (!run) {
432  tlog(2, "Found end of line NULL run marker\n");
433  continue;
434  }
435  PangoGlyph dotted_circle_glyph;
436  PangoFont* font = run->item->analysis.font;
437 
438 #ifdef _WIN32 // Fixme! Leaks memory and breaks unittests.
439  PangoGlyphString* glyphs = pango_glyph_string_new();
440  char s[] = "\xc2\xa7";
441  pango_shape(s, sizeof(s), &(run->item->analysis), glyphs);
442  dotted_circle_glyph = glyphs->glyphs[0].glyph;
443 #else
444  dotted_circle_glyph = pango_fc_font_get_glyph(
445  reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
446 #endif
447 
448  if (TLOG_IS_ON(2)) {
449  PangoFontDescription* desc = pango_font_describe(font);
450  char* desc_str = pango_font_description_to_string(desc);
451  tlog(2, "Desc of font in run: %s\n", desc_str);
452  g_free(desc_str);
453  pango_font_description_free(desc);
454  }
455 
456  PangoGlyphItemIter cluster_iter;
457  gboolean have_cluster;
458  for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
459  run, utf8_word);
460  have_cluster && !bad_glyph;
461  have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
462  const int start_byte_index = cluster_iter.start_index;
463  const int end_byte_index = cluster_iter.end_index;
464  int start_glyph_index = cluster_iter.start_glyph;
465  int end_glyph_index = cluster_iter.end_glyph;
466  string cluster_text = string(utf8_word + start_byte_index,
467  end_byte_index - start_byte_index);
468  if (graphemes) graphemes->push_back(cluster_text);
469  if (IsUTF8Whitespace(cluster_text.c_str())) {
470  tlog(2, "Skipping whitespace\n");
471  continue;
472  }
473  if (TLOG_IS_ON(2)) {
474  printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
475  start_byte_index, end_byte_index,
476  start_glyph_index, end_glyph_index);
477  }
478  for (int i = start_glyph_index,
479  step = (end_glyph_index > start_glyph_index) ? 1 : -1;
480  !bad_glyph && i != end_glyph_index; i+= step) {
481  const bool unknown_glyph =
482  (cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
483  PANGO_GLYPH_UNKNOWN_FLAG);
484  const bool illegal_glyph =
485  (cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
486  dotted_circle_glyph);
487  bad_glyph = unknown_glyph || illegal_glyph;
488  if (TLOG_IS_ON(2)) {
489  printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
490  bad_glyph ? 1 : 0);
491  }
492  }
493  if (TLOG_IS_ON(2)) {
494  printf(" '%s'\n", cluster_text.c_str());
495  }
496  if (bad_glyph)
497  tlog(1, "Found illegal glyph!\n");
498  }
499  } while (!bad_glyph && pango_layout_iter_next_run(run_iter));
500 
501  pango_layout_iter_free(run_iter);
502  g_object_unref(context);
503  g_object_unref(layout);
504  if (bad_glyph && graphemes) graphemes->clear();
505  return !bad_glyph;
506 }
#define tlog(level,...)
Definition: tlog.h:33
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
bool IsUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:182
#define TLOG_IS_ON(level)
Definition: tlog.h:39
signed int char32
Definition: normstrngs.h:27
#define DISABLE_HEAP_LEAK_CHECK
Definition: util.h:63

◆ CanRenderString() [2/2]

bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len 
) const

Definition at line 388 of file pango_font_info.cpp.

388  {
389  vector<string> graphemes;
390  return CanRenderString(utf8_word, len, &graphemes);
391 }
bool CanRenderString(const char *utf8_word, int len, std::vector< string > *graphemes) const

◆ CoversUTF8Text()

bool tesseract::PangoFontInfo::CoversUTF8Text ( const char *  utf8_text,
int  byte_length 
) const

Definition at line 268 of file pango_font_info.cpp.

268  {
269  PangoFont* font = ToPangoFont();
270  PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
271  for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
272  it != UNICHAR::end(utf8_text, byte_length);
273  ++it) {
274  if (IsWhitespace(*it) || pango_is_zero_width(*it))
275  continue;
276  if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
277  char tmp[5];
278  int len = it.get_utf8(tmp);
279  tmp[len] = '\0';
280  tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
281  return false;
282  }
283  }
284  return true;
285 }
#define tlog(level,...)
Definition: tlog.h:33
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:176
static const_iterator end(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:204
static const_iterator begin(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:200

◆ DescriptionName()

string tesseract::PangoFontInfo::DescriptionName ( ) const

Definition at line 116 of file pango_font_info.cpp.

116  {
117  if (!desc_) return "";
118  char* desc_str = pango_font_description_to_string(desc_);
119  string desc_name(desc_str);
120  g_free(desc_str);
121  return desc_name;
122 }

◆ DropUncoveredChars()

int tesseract::PangoFontInfo::DropUncoveredChars ( string *  utf8_text) const

Definition at line 309 of file pango_font_info.cpp.

309  {
310  PangoFont* font = ToPangoFont();
311  PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
312  int num_dropped_chars = 0;
313  // Maintain two iterators that point into the string. For space efficiency, we
314  // will repeatedly copy one covered UTF8 character from one to the other, and
315  // at the end resize the string to the right length.
316  char* out = const_cast<char*>(utf8_text->c_str());
317  const UNICHAR::const_iterator it_begin =
318  UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
319  const UNICHAR::const_iterator it_end =
320  UNICHAR::end(utf8_text->c_str(), utf8_text->length());
321  for (UNICHAR::const_iterator it = it_begin; it != it_end;) {
322  // Skip bad utf-8.
323  if (!it.is_legal()) {
324  ++it; // One suitable error message will still be issued.
325  continue;
326  }
327  int unicode = *it;
328  int utf8_len = it.utf8_len();
329  const char* utf8_char = it.utf8_data();
330  // Move it forward before the data gets modified.
331  ++it;
332  if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
333  pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
334  if (TLOG_IS_ON(2)) {
335  UNICHAR unichar(unicode);
336  char* str = unichar.utf8_str();
337  tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode);
338  delete[] str;
339  }
340  ++num_dropped_chars;
341  continue;
342  }
343  my_strnmove(out, utf8_char, utf8_len);
344  out += utf8_len;
345  }
346  utf8_text->resize(out - utf8_text->c_str());
347  return num_dropped_chars;
348 }
#define tlog(level,...)
Definition: tlog.h:33
#define TLOG_IS_ON(level)
Definition: tlog.h:39
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:176
static const_iterator end(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:204
static const_iterator begin(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:200
int utf8_len() const
Definition: unichar.cpp:186

◆ family_name()

const string& tesseract::PangoFontInfo::family_name ( ) const
inline

Definition at line 105 of file pango_font_info.h.

105 { return family_name_; }

◆ font_size()

int tesseract::PangoFontInfo::font_size ( ) const
inline

Definition at line 107 of file pango_font_info.h.

107 { return font_size_; }

◆ font_type()

FontTypeEnum tesseract::PangoFontInfo::font_type ( ) const
inline

Definition at line 113 of file pango_font_info.h.

113 { return font_type_; }

◆ GetSpacingProperties()

bool tesseract::PangoFontInfo::GetSpacingProperties ( const string &  utf8_char,
int *  x_bearing,
int *  x_advance 
) const

Definition at line 350 of file pango_font_info.cpp.

351  {
352  // Convert to equivalent PangoFont structure
353  PangoFont* font = ToPangoFont();
354  // Find the glyph index in the font for the supplied utf8 character.
355  int total_advance = 0;
356  int min_bearing = 0;
357  // Handle multi-unicode strings by reporting the left-most position of the
358  // x-bearing, and right-most position of the x-advance if the string were to
359  // be rendered.
360  const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(),
361  utf8_char.length());
362  const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(),
363  utf8_char.length());
364  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
365  PangoGlyph glyph_index = pango_fc_font_get_glyph(
366  reinterpret_cast<PangoFcFont*>(font), *it);
367  if (!glyph_index) {
368  // Glyph for given unicode character doesn't exist in font.
369  return false;
370  }
371  // Find the ink glyph extents for the glyph
372  PangoRectangle ink_rect, logical_rect;
373  pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
374  pango_extents_to_pixels(&ink_rect, NULL);
375  pango_extents_to_pixels(&logical_rect, NULL);
376 
377  int bearing = total_advance + PANGO_LBEARING(ink_rect);
378  if (it == it_begin || bearing < min_bearing) {
379  min_bearing = bearing;
380  }
381  total_advance += PANGO_RBEARING(logical_rect);
382  }
383  *x_bearing = min_bearing;
384  *x_advance = total_advance;
385  return true;
386 }
static const_iterator end(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:204
static const_iterator begin(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:200

◆ HardInitFontConfig()

void tesseract::PangoFontInfo::HardInitFontConfig ( const string &  fonts_dir,
const string &  cache_dir 
)
static

Definition at line 138 of file pango_font_info.cpp.

139  {
140  if (!cache_dir_.empty()) {
142  File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str());
143  }
144  const int MAX_FONTCONF_FILESIZE = 1024;
145  char fonts_conf_template[MAX_FONTCONF_FILESIZE];
146  cache_dir_ = cache_dir;
147  fonts_dir_ = fonts_dir;
148  snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
149  "<?xml version=\"1.0\"?>\n"
150  "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
151  "<fontconfig>\n"
152  "<dir>%s</dir>\n"
153  "<cachedir>%s</cachedir>\n"
154  "<config></config>\n"
155  "</fontconfig>",
156  fonts_dir.c_str(), cache_dir_.c_str());
157  string fonts_conf_file = File::JoinPath(cache_dir_.c_str(), "fonts.conf");
158  File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
159 #ifdef _WIN32
160  std::string env("FONTCONFIG_PATH=");
161  env.append(cache_dir_.c_str());
162  putenv(env.c_str());
163  putenv("LANG=en_US.utf8");
164 #else
165  setenv("FONTCONFIG_PATH", cache_dir_.c_str(), true);
166  // Fix the locale so that the reported font names are consistent.
167  setenv("LANG", "en_US.utf8", true);
168 #endif // _WIN32
169 
170  if (FcInitReinitialize() != FcTrue) {
171  tprintf("FcInitiReinitialize failed!!\n");
172  }
174  // Clear Pango's font cache too.
175  pango_cairo_font_map_set_default(NULL);
176 }
static bool DeleteMatchingFiles(const char *pattern)
Definition: fileio.cpp:113
#define tprintf(...)
Definition: tprintf.h:31
static string JoinPath(const string &prefix, const string &suffix)
Definition: fileio.cpp:83
static void WriteStringToFileOrDie(const string &str, const string &filename)
Definition: fileio.cpp:53

◆ is_bold()

bool tesseract::PangoFontInfo::is_bold ( ) const
inline

Definition at line 108 of file pango_font_info.h.

108 { return is_bold_; }

◆ is_fraktur()

bool tesseract::PangoFontInfo::is_fraktur ( ) const
inline

Definition at line 112 of file pango_font_info.h.

112 { return is_fraktur_; }

◆ is_italic()

bool tesseract::PangoFontInfo::is_italic ( ) const
inline

Definition at line 109 of file pango_font_info.h.

109 { return is_italic_; }

◆ is_monospace()

bool tesseract::PangoFontInfo::is_monospace ( ) const
inline

Definition at line 111 of file pango_font_info.h.

111 { return is_monospace_; }

◆ is_smallcaps()

bool tesseract::PangoFontInfo::is_smallcaps ( ) const
inline

Definition at line 110 of file pango_font_info.h.

110 { return is_smallcaps_; }

◆ ParseFontDescriptionName()

bool tesseract::PangoFontInfo::ParseFontDescriptionName ( const string &  name)

Definition at line 243 of file pango_font_info.cpp.

243  {
244  PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
245  bool success = ParseFontDescription(desc);
246  pango_font_description_free(desc);
247  return success;
248 }

◆ resolution()

int tesseract::PangoFontInfo::resolution ( ) const
inline

Definition at line 115 of file pango_font_info.h.

115 { return resolution_; }

◆ set_resolution()

void tesseract::PangoFontInfo::set_resolution ( const int  resolution)
inline

Definition at line 116 of file pango_font_info.h.

116  {
117  resolution_ = resolution;
118  }

◆ SoftInitFontConfig()

void tesseract::PangoFontInfo::SoftInitFontConfig ( )
static

Definition at line 128 of file pango_font_info.cpp.

128  {
129  if (fonts_dir_.empty()) {
130  HardInitFontConfig(FLAGS_fonts_dir.c_str(),
131  FLAGS_fontconfig_tmpdir.c_str());
132  }
133 }
static void HardInitFontConfig(const string &fonts_dir, const string &cache_dir)

Friends And Related Function Documentation

◆ FontUtils

friend class FontUtils
friend

Definition at line 121 of file pango_font_info.h.


The documentation for this class was generated from the following files: