Package turbogears :: Package command :: Module i18n

Source Code for Module turbogears.command.i18n

  1  # test-file: test_command_i18n.py 
  2   
  3  """Command-line user interface for i18n administration.""" 
  4   
  5  import re 
  6  import glob 
  7  import os 
  8  import atexit 
  9  import optparse 
 10  import tempfile 
 11   
 12  from pkg_resources import resource_filename 
 13  import formencode 
 14  # XXX: we need to make kid-support pluggable or such. 
 15  try: 
 16      import kid 
 17  except ImportError: 
 18      kid = None 
 19   
 20  from turbogears import config 
 21  from turbogears.i18n.pygettext import pygettext, msgfmt, catalog 
 22  import turbogears.i18n.utils as i18n_utils 
 23  from turbogears.command.base import silent_os_remove 
 24  from turbogears.util import load_project_config, get_package_name 
 25   
 26   
27 -class ProgramError(StandardError):
28 """Signals about a general application error."""
29 30
31 -def copy_file(src, dest):
32 if os.path.exists(dest): 33 os.remove(dest) 34 fh = open(src, 'rb') 35 data = fh.read() 36 fh.close() 37 fh = open(dest, 'wb') 38 fh.write(data) 39 fh.close()
40 41 42 _str_literal = r"""(?:'((?:[^']|\\')*)'|"((?:[^"]|\\")*)")""" 43 _py_i18n_re = re.compile(r"\b_\s*\(\s*[uU]?[rR]?%s\s*\)" % _str_literal) 44 _js_i18n_re = re.compile(r"\b_\s*\(\s*%s\s*\)" % _str_literal) 45 46
47 -class InternationalizationTool(object):
48 """Manages i18n data via command-line interface. 49 50 Contributed to TurboGears by Max Ischenko (http://maxischenko.in.ua). 51 52 """ 53 54 desc = "Manage i18n data" 55 need_project = True 56 config = None 57 load_config = True 58 locale_dir = 'locales' 59 domain = 'messages' 60 ignore_dirs = ('cvs', 'sqlobject-history') 61 62 name = None 63 package = None 64
65 - def __init__(self, version):
66 parser = optparse.OptionParser(usage=""" 67 %prog i18n [options] <command> 68 69 Available commands: 70 add <locale> Creates a message catalog for specified locale 71 collect Scan source files to gather translatable strings in a .pot file 72 merge Sync message catalog in different languages with .pot file 73 compile Compile message catalog (.po -> .mo) 74 create_js_messages Create message catalogs for JS usage 75 clean Delete backups and compiled files 76 """, version="%prog " + version) 77 parser.add_option("-f", "--force", default=False, 78 action="store_true", dest="force_ops", 79 help="Force potentially damaging actions") 80 parser.add_option("-a", "--ascii", default=False, 81 action="store_true", dest="ascii_output", 82 help="Escape non-ascii characters (.py files only)") 83 parser.add_option("-K", "--no-kid-support", default=True, 84 action="store_false", dest="kid_support", 85 help="Do not extract messages from Kid templates") 86 parser.add_option("-G", "--no-genshi-support", default=True, 87 action="store_false", dest="genshi_support", 88 help="Do not extract messages from Genshi templates") 89 parser.add_option("", "--template-dir", default='templates', 90 action="store", dest="template_dir", 91 help="The name of directories containing templates") 92 parser.add_option("", "--src-dir", default=None, 93 action="store", dest="source_dir", 94 help="Directory that contains source files") 95 parser.add_option("", "--no-js-support", default=True, 96 action="store_false", dest="js_support", 97 help="Extract messages from js-files.") 98 parser.add_option("", "--js-base-dir", 99 action="store", dest="js_base_dir", 100 default="static/javascript", 101 help="Base directory of javascript files" 102 " for generated message-files.") 103 parser.add_option("-e", "--js-encoding", 104 dest="js_encoding", default="utf-8", metavar="ENCODING", 105 help="Encoding of JavaScript source files (default: UTF-8)") 106 parser.set_defaults(js_support=True) 107 self.parser = parser
108
109 - def load_project_config(self):
110 """Choose the config file. 111 112 Try to guess whether this is a development or installed project. 113 114 """ 115 116 # check whether user specified custom settings 117 if self.load_config: 118 load_project_config(self.config) 119 120 if config.get('i18n.locale_dir'): 121 self.locale_dir = config.get('i18n.locale_dir') 122 print "Use %s as a locale directory" % self.locale_dir 123 if config.get('i18n.domain'): 124 self.domain = config.get('i18n.domain') 125 print "Use %s as a message domain" % self.domain 126 127 if (os.path.exists(self.locale_dir) 128 and not os.path.isdir(self.locale_dir)): 129 raise ProgramError( 130 "%s is not a directory" % self.locale_dir) 131 132 if not os.path.exists(self.locale_dir): 133 os.makedirs(self.locale_dir)
134
135 - def parse_args(self):
136 return self.parser.parse_args()
137
138 - def run(self):
139 self.load_project_config() 140 options, args = self.parse_args() 141 if not args: 142 self.parser.error("No command specified") 143 self.options = options 144 command, args = args[0], args[1:] 145 if 'collect' == command: 146 self.scan_source_files() 147 elif 'add' == command: 148 self.add_languages(args) 149 elif 'compile' == command: 150 self.compile_message_catalogs() 151 elif 'merge' == command: 152 self.merge_message_catalogs() 153 elif 'clean' == command: 154 self.clean_generated_files() 155 elif 'create_js_messages' == command: 156 self.create_js_messages() 157 else: 158 self.parser.error("Command not recognized")
159
160 - def create_js_messages(self):
161 self.load_project_config() 162 languages = [] 163 # we assume the the structure of messages is always 164 # <self.locale_dir>/<lang>/LC_MESSAGES ... 165 # to extract the languages known to the app 166 locale_dir_prefix = self.locale_dir.split(os.sep) 167 for fname in self.list_message_catalogs(): 168 languages.append(fname.split(os.sep)[len(locale_dir_prefix):][0]) 169 srcdir = self.options.source_dir or get_package_name().split('.', 1)[0] 170 def list_js_files(): 171 for root, dirs, files in os.walk(srcdir): 172 dir = os.path.basename(root) 173 if dir.startswith('.') or dir in self.ignore_dirs: 174 continue 175 for fname in files: 176 ext = os.path.splitext(fname)[1] 177 srcfile = os.path.join(root, fname) 178 if ext == '.js': 179 yield srcfile
180 def escape(arg): 181 if "'" in arg: 182 return '"%s"' % arg 183 return "'%s'" % arg
184 for locale in languages: 185 def gl(): 186 return locale 187 i18n_utils._get_locale = gl 188 messages = [] 189 for filename in list_js_files(): 190 for key in self.get_strings_in_js(os.path.join(filename))[0]: 191 key = unicode(key) 192 msg = unicode(_(key, locale)) 193 messages.append((key, msg)) 194 # for a final return 195 header = """ 196 if (typeof(MESSAGES) == 'undefined') { 197 MESSAGES = {}; 198 } 199 200 LANG = '%s'; 201 _messages = [ 202 """ % locale 203 footer = """ 204 ]; 205 206 for(var i in _messages) { 207 MESSAGES[_messages[i][0]] = _messages[i][1]; 208 } 209 """ 210 message_block = u',\n'.join(['[%s , %s]' % (escape(msgid), 211 escape(msgstr)) for msgid, msgstr in messages]).encode('utf-8') 212 message_block += '\n' 213 outfilename = os.path.join(srcdir, self.options.js_base_dir, 214 'messages-%s.js' % locale) 215 print "Creating message file <%s>." % outfilename 216 mf = open(outfilename, 'w') 217 mf.write(header) 218 mf.write(message_block) 219 mf.write(footer) 220 mf.close() 221
222 - def clean_generated_files(self):
223 potfile = self.get_potfile_path() 224 silent_os_remove(potfile.replace('.pot', '.bak')) 225 for fname in self.list_message_catalogs(): 226 silent_os_remove(fname.replace('.po', '.mo')) 227 silent_os_remove(fname.replace('.po', '.back'))
228
229 - def merge_message_catalogs(self):
230 potfile = self.get_potfile_path() 231 catalogs = self.list_message_catalogs() 232 catalog.merge(potfile, catalogs)
233
234 - def compile_message_catalogs(self):
235 for fname in self.list_message_catalogs(): 236 dest = fname.replace('.po','.mo') 237 msgfmt.make(fname, dest) 238 if os.path.exists(dest): 239 print "Compiled %s OK" % fname 240 else: 241 print "Compilation of %s failed!" % fname
242
243 - def _copy_file_withcheck(self, sourcefile, targetfile):
244 if not (os.path.exists(targetfile) and not self.options.force_ops): 245 copy_file(sourcefile, targetfile) 246 print "Copy", sourcefile, "to", targetfile 247 else: 248 print "File %s exists, use --force to override" % targetfile
249
250 - def _copy_moduletranslation(self, sourcefile, targetdir, language):
251 modulefilename = os.path.basename(sourcefile) 252 if os.path.exists(sourcefile): 253 targetfile = os.path.join(targetdir, modulefilename) 254 self._copy_file_withcheck(sourcefile, targetfile) 255 else: 256 print ("%s translation for language '%s' does not exist" 257 " (file searched '%s').\nPlease consider to contribute" 258 " a translation." % (modulefilename, language, sourcefile))
259
260 - def add_languages(self, codes):
261 potfile = self.get_potfile_path() 262 if not os.path.isfile(potfile): 263 print "Run 'collect' first to create", potfile 264 return 265 for code in codes: 266 catalog_file = self.get_locale_catalog(code) 267 langdir = os.path.dirname(catalog_file) 268 if not os.path.exists(langdir): 269 os.makedirs(langdir) 270 271 sourcefile_fe = os.path.join(formencode.api.get_localedir(), code, 272 'LC_MESSAGES', 'FormEncode.mo') 273 self._copy_moduletranslation(sourcefile_fe, langdir, code) 274 275 basedir_i18n_tg = resource_filename('turbogears.i18n', 'data') 276 sourcefile_tg = os.path.join(basedir_i18n_tg, code, 277 'LC_MESSAGES', 'TurboGears.mo') 278 self._copy_moduletranslation(sourcefile_tg, langdir, code) 279 280 self._copy_file_withcheck(potfile, catalog_file)
281
282 - def scan_source_files(self):
283 source_files = [] 284 kid_files = [] 285 genshi_files = [] 286 js_files = [] 287 srcdir = self.options.source_dir or get_package_name().split('.', 1)[0] 288 tmpldir = self.options.template_dir 289 print "Scanning source directory", srcdir 290 for root, dirs, files in os.walk(srcdir): 291 dir = os.path.basename(root) 292 if dir.startswith('.') or dir in self.ignore_dirs: 293 continue 294 for fname in files: 295 ext = os.path.splitext(fname)[1] 296 srcfile = os.path.join(root, fname) 297 if ext == '.py': 298 source_files.append(srcfile) 299 elif ext == '.kid' and (not tmpldir or dir == tmpldir): 300 kid_files.append(srcfile) 301 elif ext == '.html' and (not tmpldir or dir == tmpldir): 302 genshi_files.append(srcfile) 303 elif ext == '.js': 304 js_files.append(srcfile) 305 else: 306 pass # do nothing 307 tmp_handle, tmp_potfile = tempfile.mkstemp( 308 '.pot', 'tmp', self.locale_dir) 309 os.close(tmp_handle) 310 potbasename = os.path.basename(tmp_potfile)[:-4] 311 pygettext_options = ['-v', '-d', potbasename, 312 '-p', os.path.dirname(tmp_potfile)] 313 if self.options.ascii_output: 314 pygettext_options.insert(0, '-E') 315 pygettext.sys.argv = [''] + pygettext_options + source_files 316 pygettext.main() 317 if not os.path.exists(tmp_potfile): 318 raise ProgramError("pygettext failed") 319 atexit.register(silent_os_remove, tmp_potfile) 320 321 if kid_files and self.options.kid_support: 322 if not kid: 323 print "Kid not installed, no support for Kid templates." 324 else: 325 self.scan_kid_files(tmp_potfile, kid_files) 326 327 if genshi_files and self.options.genshi_support: 328 try: 329 self.scan_genshi_files(tmp_potfile, genshi_files) 330 except ImportError: 331 print "Genshi not installed, no support for Genshi templates." 332 333 if js_files and self.options.js_support: 334 self.scan_js_files(tmp_potfile, js_files) 335 336 potfile = self.get_potfile_path() 337 if os.path.isfile(potfile): 338 bakfile = potfile.replace('.pot', '.bak') 339 silent_os_remove(bakfile) 340 os.rename(potfile, bakfile) 341 print "Backup existing file to", bakfile 342 os.rename(tmp_potfile, potfile) 343 print "Message templates written to", potfile
344
345 - def scan_genshi_files(self, potfile, files):
346 """Extract strings from Genhsi templates and write to pot file. 347 348 Configuration settings: 349 350 `genshi.i18n.ignore_tags` -- `['script', 'style']` 351 List of element names. Content inside elements named in this list 352 is not extracted as translatable text. Can be a space-separated 353 string or a list of string. 354 `genshi.i18n.include_attrs` -- `['abbr', 'alt', 'label', 'prompt', 355 'standby', 'summary', 'title']` 356 List of attribute names. Only values of the attributes named in 357 this list are extracted as translatable text. Can be a 358 space-separated string or a list os strings. 359 360 See http://genshi.edgewall.org/wiki/Documentation/0.5.x/i18n.html for 361 more information. 362 363 """ 364 365 extract_strings = pygettext.extract_genshi_strings 366 messages = [] 367 options = {} 368 for opt in ('include_attrs', 'ignore_tags'): 369 val = config.get('genshi.i18n.' + opt) 370 if val: 371 options[opt] = val 372 373 for fname in files: 374 print "Working on", fname 375 for msg in extract_strings(fname, options or None): 376 # Genshi 0.6 adds comments as additional 4th element of msg, 377 # so we take care to stay compatible 378 lineno, text = msg[0], msg[2] 379 if text: 380 if isinstance(text, tuple): # e.g. for ngettext 381 for subtext in text: 382 if subtext: 383 messages.append((lineno, fname, subtext)) 384 else: 385 messages.append((lineno, fname, text)) 386 387 self._write_potfile_entries(potfile, messages)
388
389 - def scan_kid_files(self, potfile, files):
390 """Extract strings from Genshi templates and write to pot file.""" 391 392 # XXX: This duplicates functionality already present in 393 # turbogears.i18n.pygettext! This should be refactored. 394 messages = [] 395 tags_to_ignore = ['script', 'style'] 396 keys = [] 397 398 def process_text(is_attribute, k, tag): 399 key = None 400 s = _py_i18n_re.search(k) 401 if s: 402 key = (s.group(1) or s.group(2) or '').strip() 403 elif not is_attribute: 404 # we don't have a kid expression in there, so it is 405 # "just" a text entry - which we want to be translated! 406 import kid.codewriter as cw 407 parts = cw.interpolate(k) 408 if isinstance(parts, list) and len(parts) > 1: 409 print "Warning: Mixed content in tag <%s>: %s" % (tag, k) 410 elif isinstance(parts, basestring): 411 key = k.strip() 412 if key and key not in keys and tag not in tags_to_ignore: 413 messages.append((tag, fname, key)) 414 keys.append(key)
415 416 for fname in files: 417 print "Working on", fname 418 tree = None 419 fh = open(fname) 420 try: 421 tree = kid.document(fh) 422 except Exception, e: 423 fh.close() 424 print "Skip %s: %s" % (fname, e) 425 continue 426 sentinel = None 427 tag = None 428 for ev, el in tree: 429 if ev == kid.parser.START: 430 if not isinstance(el.tag, unicode): 431 # skip comments, processing instructions etc. 432 continue 433 if el.get('lang', None) is not None: 434 # if we have a lang-attribute, ignore this 435 # node AND all it's descendants. 436 sentinel = el 437 continue 438 # set the tag from the current one. 439 tag = re.sub('({[^}]+})?(\w+)', '\\2', el.tag) 440 if tag in ('script', 'style'): 441 # skip JavaScript, CSS etc. 442 sentinel = el 443 continue 444 # process the attribute texts 445 for attrib_text in el.attrib.values(): 446 process_text(True, attrib_text, tag) 447 elif ev == kid.parser.END: 448 if el is sentinel: 449 sentinel = None 450 elif ev == kid.parser.TEXT: 451 if sentinel is None and el.strip(): 452 process_text(False, el, tag) 453 fh.close() 454 self._write_potfile_entries(potfile, messages) 455
456 - def get_strings_in_js(self, fname):
457 messages = [] 458 keys = [] 459 fh = open(fname) 460 for i, line in enumerate(fh): 461 s = _js_i18n_re.search(line.decode(self.options.js_encoding)) 462 while s: 463 key = s.group(1) or s.group(2) 464 pos = s.end() 465 if key and key not in keys: 466 messages.append((i + 1, fname, key)) 467 keys.append(key) 468 s = _js_i18n_re.search(line, pos) 469 fh.close() 470 return keys, messages
471
472 - def scan_js_files(self, potfile, files):
473 messages = [] 474 keys = [] 475 for fname in files: 476 print "Working on", fname 477 k, m = self.get_strings_in_js(fname) 478 keys.extend(k) 479 messages.extend(m) 480 self._write_potfile_entries(potfile, messages)
481
482 - def _write_potfile_entries(self, potfile, messages):
483 if messages: 484 fd = open(potfile, 'at+') 485 for linenumber, fname, text in messages: 486 if text: 487 text = catalog.normalize(text.encode('utf-8')) 488 fd.write('#: %s:%s\n' % (fname, linenumber)) 489 fd.write('msgid %s\n' % text) 490 fd.write('msgstr ""\n\n') 491 fd.close()
492
493 - def get_potfile_path(self):
494 return os.path.join(self.locale_dir, '%s.pot' % self.domain)
495
496 - def get_locale_catalog(self, code):
497 return os.path.join(self.locale_dir, code, 'LC_MESSAGES', 498 '%s.po' % self.domain)
499
500 - def list_message_catalogs(self):
501 files = [] 502 for name in glob.glob(self.locale_dir + '/*'): 503 if os.path.isdir(name): 504 fname = os.path.join(name, 'LC_MESSAGES', '%s.po' % self.domain) 505 if os.path.isfile(fname): 506 files.append(fname) 507 return files
508
509 - def fix_tzinfo(self, potfile):
510 """Fix tzinfo. 511 512 In certain enviroments, tzinfo as formatted by strftime() is not utf-8, 513 e.g. Windows XP with Russian MUL. 514 515 This leads to later error when a program trying to read catalog. 516 517 """ 518 fh = open(potfile, 'rb') 519 data = fh.read() 520 fh.close() 521 def repl(m): 522 """Remove tzinfo if it breaks encoding.""" 523 tzinfo = m.group(2) 524 try: 525 tzinfo.decode('utf-8') 526 except UnicodeDecodeError: 527 return m.group(1) # cut tz info 528 return m.group(0) # leave unchanged
529 data = re.sub( 530 "(POT-Creation-Date: [\d-]+ [0-9:]+)\+([^\\\\]+)", repl, data) 531 fh = open(potfile, 'wb') 532 fh.write(data) 533 fh.close() 534 535
536 -def main():
537 tool = InternationalizationTool() 538 tool.run()
539 540 541 if __name__ == '__main__': 542 main() 543