Package turbogears :: Package command :: Module i18n

Source Code for Module turbogears.command.i18n

  1  # test-file: test_command_i18n.py 
  2   
  3  """Command-line user interface for i18n administration.""" 
  4   
  5  import re 
  6  import glob 
  7  import os 
  8  import os.path 
  9  import atexit 
 10  import optparse 
 11  import tempfile 
 12   
 13  from pkg_resources import resource_filename 
 14  import formencode 
 15  # XXX: we need to make kid-support pluggable or such. 
 16  try: 
 17      import kid 
 18  except ImportError: 
 19      kid = None 
 20   
 21  import turbogears 
 22  import turbogears.i18n 
 23  from turbogears import config 
 24  from turbogears.i18n.pygettext import pygettext, msgfmt, catalog 
 25  from turbogears.command.base import silent_os_remove 
 26  from turbogears.util import load_project_config, get_package_name 
 27   
 28   
29 -class ProgramError(StandardError):
30 """Signals about a general application error."""
31 32
33 -def copy_file(src, dest):
34 if os.path.exists(dest): 35 os.remove(dest) 36 fh = open(src, 'rb') 37 data = fh.read() 38 fh.close() 39 fh = open(dest, 'wb') 40 fh.write(data) 41 fh.close()
42 43 44 _str_literal = r"""(?:'((?:[^']|\\')*)'|"((?:[^"]|\\")*)")""" 45 _py_i18n_re = re.compile(r"\b_\s*\(\s*[uU]?[rR]?%s\s*\)" % _str_literal) 46 _js_i18n_re = re.compile(r"\b_\s*\(\s*%s\s*\)" % _str_literal) 47 48
49 -class InternationalizationTool(object):
50 """Manages i18n data via command-line interface. 51 52 Contributed to TurboGears by Max Ischenko (http://maxischenko.in.ua). 53 54 """ 55 56 desc = "Manage i18n data" 57 need_project = True 58 config = None 59 load_config = True 60 locale_dir = 'locales' 61 domain = 'messages' 62 ignore_dirs = ('cvs', 'sqlobject-history') 63 64 name = None 65 package = None 66
67 - def __init__(self, version):
68 parser = optparse.OptionParser(usage=""" 69 %prog i18n [options] <command> 70 71 Available commands: 72 add <locale> Creates a message catalog for specified locale 73 collect Scan source files to gather translatable strings in a .pot file 74 merge Sync message catalog in different languages with .pot file 75 compile Compile message catalog (.po -> .mo) 76 create_js_messages Create message catalogs for JS usage 77 clean Delete backups and compiled files 78 """, version="%prog " + version) 79 parser.add_option("-f", "--force", default=False, 80 action="store_true", dest="force_ops", 81 help="Force potentially damaging actions") 82 parser.add_option("-a", "--ascii", default=False, 83 action="store_true", dest="ascii_output", 84 help="Escape non-ascii characters (.py files only)") 85 parser.add_option("-K", "--no-kid-support", default=True, 86 action="store_false", dest="kid_support", 87 help="Do not extract messages from Kid templates") 88 parser.add_option("-G", "--no-genshi-support", default=True, 89 action="store_false", dest="genshi_support", 90 help="Do not extract messages from Genshi templates") 91 parser.add_option("", "--template-dir", default='templates', 92 action="store", dest="template_dir", 93 help="The name of directories containing templates") 94 parser.add_option("", "--src-dir", default=None, 95 action="store", dest="source_dir", 96 help="Directory that contains source files") 97 parser.add_option("", "--no-js-support", default=True, 98 action="store_false", dest="js_support", 99 help="Extract messages from js-files.") 100 parser.add_option("", "--js-base-dir", 101 action="store", dest="js_base_dir", 102 default="static/javascript", 103 help="Base directory of javascript files" 104 " for generated message-files.") 105 parser.add_option("-e", "--js-encoding", 106 dest="js_encoding", default="utf-8", metavar="ENCODING", 107 help="Encoding of JavaScript source files (default: UTF-8)") 108 parser.set_defaults(js_support=True) 109 self.parser = parser
110
111 - def load_project_config(self):
112 """Choose the config file. 113 114 Try to guess whether this is a development or installed project. 115 116 """ 117 118 # check whether user specified custom settings 119 if self.load_config: 120 load_project_config(self.config) 121 122 if config.get("i18n.locale_dir"): 123 self.locale_dir = config.get("i18n.locale_dir") 124 print "Use %s as a locale directory" % self.locale_dir 125 if config.get('i18n.domain'): 126 self.domain = config.get('i18n.domain') 127 print "Use %s as a message domain" % self.domain 128 129 if os.path.exists(self.locale_dir) \ 130 and not os.path.isdir(self.locale_dir): 131 raise ProgramError( 132 "%s is not a directory" % self.locale_dir) 133 134 if not os.path.exists(self.locale_dir): 135 os.makedirs(self.locale_dir)
136
137 - def parse_args(self):
138 return self.parser.parse_args()
139
140 - def run(self):
141 self.load_project_config() 142 options, args = self.parse_args() 143 if not args: 144 self.parser.error("No command specified") 145 self.options = options 146 command, args = args[0], args[1:] 147 if 'collect' == command: 148 self.scan_source_files() 149 elif 'add' == command: 150 self.add_languages(args) 151 elif 'compile' == command: 152 self.compile_message_catalogs() 153 elif 'merge' == command: 154 self.merge_message_catalogs() 155 elif 'clean' == command: 156 self.clean_generated_files() 157 elif 'create_js_messages' == command: 158 self.create_js_messages() 159 else: 160 self.parser.error("Command not recognized")
161
162 - def create_js_messages(self):
163 self.load_project_config() 164 languages = [] 165 # we assume the the structure of messages is always 166 # <self.locale_dir>/<lang>/LC_MESSAGES ... 167 # to extract the languages known to the app 168 locale_dir_prefix = self.locale_dir.split(os.sep) 169 for fname in self.list_message_catalogs(): 170 languages.append(fname.split(os.sep)[len(locale_dir_prefix):][0]) 171 import turbogears.i18n.utils as utils 172 srcdir = self.options.source_dir or get_package_name().split('.', 1)[0] 173 def list_js_files(): 174 for root, dirs, files in os.walk(srcdir): 175 dir = os.path.basename(root) 176 if dir.startswith('.') or dir in self.ignore_dirs: 177 continue 178 for fname in files: 179 ext = os.path.splitext(fname)[1] 180 srcfile = os.path.join(root, fname) 181 if ext == '.js': 182 yield srcfile
183 def escape(arg): 184 if "'" in arg: 185 return '"%s"' % arg 186 return "'%s'" % arg
187 for locale in languages: 188 def gl(): 189 return locale 190 utils._get_locale = gl 191 messages = [] 192 for filename in list_js_files(): 193 for key in self.get_strings_in_js(os.path.join(filename))[0]: 194 key = unicode(key) 195 msg = unicode(_(key, locale)) 196 messages.append((key, msg)) 197 # for a final return 198 header = """ 199 if (typeof(MESSAGES) == "undefined") { 200 MESSAGES = {}; 201 } 202 203 LANG = '%s'; 204 _messages = [ 205 """ % locale 206 footer = """ 207 ]; 208 209 for(var i in _messages) { 210 MESSAGES[_messages[i][0]] = _messages[i][1]; 211 } 212 """ 213 message_block = u',\n'.join(['[%s , %s]' % (escape(msgid), 214 escape(msgstr)) for msgid, msgstr in messages]).encode('utf-8') 215 message_block += '\n' 216 outfilename = os.path.join(srcdir, self.options.js_base_dir, 217 'messages-%s.js' % locale) 218 print "Creating message file <%s>." % outfilename 219 mf = open(outfilename, "w") 220 mf.write(header) 221 mf.write(message_block) 222 mf.write(footer) 223 mf.close() 224
225 - def clean_generated_files(self):
226 potfile = self.get_potfile_path() 227 silent_os_remove(potfile.replace('.pot', '.bak')) 228 for fname in self.list_message_catalogs(): 229 silent_os_remove(fname.replace('.po', '.mo')) 230 silent_os_remove(fname.replace('.po', '.back'))
231
232 - def merge_message_catalogs(self):
233 potfile = self.get_potfile_path() 234 catalogs = self.list_message_catalogs() 235 catalog.merge(potfile, catalogs)
236
237 - def compile_message_catalogs(self):
238 for fname in self.list_message_catalogs(): 239 dest = fname.replace('.po','.mo') 240 msgfmt.make(fname, dest) 241 if os.path.exists(dest): 242 print "Compiled %s OK" % fname 243 else: 244 print "Compilation of %s failed!" % fname
245
246 - def _copy_file_withcheck(self, sourcefile, targetfile):
247 if not (os.path.exists(targetfile) and not self.options.force_ops): 248 copy_file(sourcefile, targetfile) 249 print "Copy", sourcefile, "to", targetfile 250 else: 251 print "File %s exists, use --force to override" % targetfile
252
253 - def _copy_moduletranslation(self, sourcefile, targetdir, language):
254 modulefilename = os.path.basename(sourcefile) 255 if os.path.exists(sourcefile): 256 targetfile = os.path.join(targetdir, modulefilename) 257 self._copy_file_withcheck(sourcefile, targetfile) 258 else: 259 print ("%s translation for language '%s' does not exist" 260 " (file searched '%s').\nPlease consider to contribute" 261 " a translation." % (modulefilename, language, sourcefile))
262
263 - def add_languages(self, codes):
264 potfile = self.get_potfile_path() 265 if not os.path.isfile(potfile): 266 print "Run 'collect' first to create", potfile 267 return 268 for code in codes: 269 catalog_file = self.get_locale_catalog(code) 270 langdir = os.path.dirname(catalog_file) 271 if not os.path.exists(langdir): 272 os.makedirs(langdir) 273 274 sourcefile_fe = os.path.join(formencode.api.get_localedir(), code, 275 'LC_MESSAGES', 'FormEncode.mo') 276 self._copy_moduletranslation(sourcefile_fe, langdir, code) 277 278 basedir_i18n_tg = resource_filename('turbogears.i18n', 'data') 279 sourcefile_tg = os.path.join(basedir_i18n_tg, code, 280 'LC_MESSAGES', 'TurboGears.mo') 281 self._copy_moduletranslation(sourcefile_tg, langdir, code) 282 283 self._copy_file_withcheck(potfile, catalog_file)
284
285 - def scan_source_files(self):
286 source_files = [] 287 kid_files = [] 288 genshi_files = [] 289 js_files = [] 290 srcdir = self.options.source_dir or get_package_name().split('.', 1)[0] 291 tmpldir = self.options.template_dir 292 print "Scanning source directory", srcdir 293 for root, dirs, files in os.walk(srcdir): 294 dir = os.path.basename(root) 295 if dir.startswith('.') or dir in self.ignore_dirs: 296 continue 297 for fname in files: 298 ext = os.path.splitext(fname)[1] 299 srcfile = os.path.join(root, fname) 300 if ext == '.py': 301 source_files.append(srcfile) 302 elif ext == '.kid' and (not tmpldir or dir == tmpldir): 303 kid_files.append(srcfile) 304 elif ext == '.html' and (not tmpldir or dir == tmpldir): 305 genshi_files.append(srcfile) 306 elif ext == '.js': 307 js_files.append(srcfile) 308 else: 309 pass # do nothing 310 tmp_handle, tmp_potfile = tempfile.mkstemp( 311 '.pot', 'tmp', self.locale_dir) 312 os.close(tmp_handle) 313 potbasename = os.path.basename(tmp_potfile)[:-4] 314 pygettext_options = ['-v', '-d', potbasename, 315 '-p', os.path.dirname(tmp_potfile)] 316 if self.options.ascii_output: 317 pygettext_options.insert(0, '-E') 318 pygettext.sys.argv = [''] + pygettext_options + source_files 319 pygettext.main() 320 if not os.path.exists(tmp_potfile): 321 raise ProgramError("pygettext failed") 322 atexit.register(silent_os_remove, tmp_potfile) 323 324 if kid_files and self.options.kid_support: 325 if not kid: 326 print "Kid not installed, no support for Kid templates." 327 else: 328 self.scan_kid_files(tmp_potfile, kid_files) 329 330 if genshi_files and self.options.genshi_support: 331 try: 332 self.scan_genshi_files(tmp_potfile, genshi_files) 333 except ImportError: 334 print "Genshi not installed, no support for Genshi templates." 335 336 if js_files and self.options.js_support: 337 self.scan_js_files(tmp_potfile, js_files) 338 339 potfile = self.get_potfile_path() 340 if os.path.isfile(potfile): 341 bakfile = potfile.replace('.pot', '.bak') 342 silent_os_remove(bakfile) 343 os.rename(potfile, bakfile) 344 print "Backup existing file to", bakfile 345 os.rename(tmp_potfile, potfile) 346 print "Message templates written to", potfile
347
348 - def scan_genshi_files(self, potfile, files):
349 """Extract strings from Genhsi templates and write to pot file. 350 351 Configuration settings: 352 353 `genshi.i18n.ignore_tags` -- `['script', 'style']` 354 List of element names. Content inside elements named in this list 355 is not extracted as translatable text. Can be a space-separated 356 string or a list of string. 357 `genshi.i18n.include_attrs` -- `['abbr', 'alt', 'label', 'prompt', 358 'standby', 'summary', 'title']` 359 List of attribute names. Only values of the attributes named in 360 this list are extracted as translatable text. Can be a 361 space-separated string or a list os strings. 362 363 See http://genshi.edgewall.org/wiki/Documentation/0.5.x/i18n.html for 364 more information. 365 366 """ 367 368 extract_strings = turbogears.i18n.pygettext.pygettext.extract_genshi_strings 369 messages = [] 370 options = {} 371 for opt in ('include_attrs', 'ignore_tags'): 372 val = config.get('genshi.i18n.' + opt) 373 if val: 374 options[opt] = val 375 376 for fname in files: 377 print "Working on", fname 378 for msg in extract_strings(fname, options or None): 379 # Genshi 0.6 adds comments as additional 4th element of msg, 380 # so we take care to stay compatible 381 lineno, text = msg[0], msg[2] 382 if text: 383 if isinstance(text, tuple): # e.g. for ngettext 384 for subtext in text: 385 if subtext: 386 messages.append((lineno, fname, subtext)) 387 else: 388 messages.append((lineno, fname, text)) 389 390 self._write_potfile_entries(potfile, messages)
391
392 - def scan_kid_files(self, potfile, files):
393 """Extract strings from Genhsi templates and write to pot file.""" 394 395 # XXX: This duplicates functionality already present in 396 # turbogears.i18n.pygettext! This should be recfactored. 397 messages = [] 398 tags_to_ignore = ['script', 'style'] 399 keys = [] 400 401 def process_text(is_attribute, k, tag): 402 key = None 403 s = _py_i18n_re.search(k) 404 if s: 405 key = (s.group(1) or s.group(2) or '').strip() 406 elif not is_attribute: 407 # we don't have a kid expression in there, so it is 408 # "just" a text entry - which we want to be translated! 409 import kid.codewriter as cw 410 parts = cw.interpolate(k) 411 if isinstance(parts, list) and len(parts) > 1: 412 print "Warning: Mixed content in tag <%s>: %s" % (tag, k) 413 elif isinstance(parts, basestring): 414 key = k.strip() 415 if key and key not in keys and tag not in tags_to_ignore: 416 messages.append((tag, fname, key)) 417 keys.append(key)
418 419 for fname in files: 420 print "Working on", fname 421 tree = None 422 fh = open(fname) 423 try: 424 tree = kid.document(fh) 425 except Exception, e: 426 fh.close() 427 print "Skip %s: %s" % (fname, e) 428 continue 429 sentinel = None 430 tag = None 431 for ev, el in tree: 432 if ev == kid.parser.START: 433 if not isinstance(el.tag, unicode): 434 # skip comments, processing instructions etc. 435 continue 436 if el.get('lang', None) is not None: 437 # if we have a lang-attribute, ignore this 438 # node AND all it's descendants. 439 sentinel = el 440 continue 441 # set the tag from the current one. 442 tag = re.sub('({[^}]+})?(\w+)', '\\2', el.tag) 443 if tag in ('script', 'style'): 444 # skip JavaScript, CSS etc. 445 sentinel = el 446 continue 447 # process the attribute texts 448 for attrib_text in el.attrib.values(): 449 process_text(True, attrib_text, tag) 450 elif ev == kid.parser.END: 451 if el is sentinel: 452 sentinel = None 453 elif ev == kid.parser.TEXT: 454 if sentinel is None and el.strip(): 455 process_text(False, el, tag) 456 fh.close() 457 self._write_potfile_entries(potfile, messages) 458
459 - def get_strings_in_js(self, fname):
460 messages = [] 461 keys = [] 462 fh = open(fname) 463 for i, line in enumerate(fh): 464 s = _js_i18n_re.search(line.decode(self.options.js_encoding)) 465 while s: 466 key = s.group(1) or s.group(2) 467 pos = s.end() 468 if key and key not in keys: 469 messages.append((i + 1, fname, key)) 470 keys.append(key) 471 s = _js_i18n_re.search(line, pos) 472 fh.close() 473 return keys, messages
474
475 - def scan_js_files(self, potfile, files):
476 messages = [] 477 keys = [] 478 for fname in files: 479 print "Working on", fname 480 k, m = self.get_strings_in_js(fname) 481 keys.extend(k) 482 messages.extend(m) 483 self._write_potfile_entries(potfile, messages)
484
485 - def _write_potfile_entries(self, potfile, messages):
486 if messages: 487 fd = open(potfile, 'at+') 488 for linenumber, fname, text in messages: 489 if text: 490 text = catalog.normalize(text.encode('utf-8')) 491 fd.write('#: %s:%s\n' % (fname, linenumber)) 492 fd.write('msgid %s\n' % text) 493 fd.write('msgstr ""\n\n') 494 fd.close()
495
496 - def get_potfile_path(self):
497 return os.path.join(self.locale_dir, '%s.pot' % self.domain)
498
499 - def get_locale_catalog(self, code):
500 return os.path.join(self.locale_dir, code, 'LC_MESSAGES', 501 '%s.po' % self.domain)
502
503 - def list_message_catalogs(self):
504 files = [] 505 for name in glob.glob(self.locale_dir + '/*'): 506 if os.path.isdir(name): 507 fname = os.path.join(name, 'LC_MESSAGES', '%s.po' % self.domain) 508 if os.path.isfile(fname): 509 files.append(fname) 510 return files
511
512 - def fix_tzinfo(self, potfile):
513 """Fix tzinfo. 514 515 In certain enviroments, tzinfo as formatted by strftime() is not utf-8, 516 e.g. Windows XP with Russian MUL. 517 518 This leads to later error when a program trying to read catalog. 519 520 """ 521 fh = open(potfile, 'rb') 522 data = fh.read() 523 fh.close() 524 def repl(m): 525 """Remove tzinfo if it breaks encoding.""" 526 tzinfo = m.group(2) 527 try: 528 tzinfo.decode('utf-8') 529 except UnicodeDecodeError: 530 return m.group(1) # cut tz info 531 return m.group(0) # leave unchanged
532 data = re.sub( 533 "(POT-Creation-Date: [\d-]+ [0-9:]+)\+([^\\\\]+)", repl, data) 534 fh = open(potfile, 'wb') 535 fh.write(data) 536 fh.close() 537 538
539 -def main():
540 tool = InternationalizationTool() 541 tool.run()
542 543 544 if __name__ == '__main__': 545 main() 546