xliff_to_json.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. #!/usr/bin/python
  2. # Converts .xlf files into .json files for use at http://translatewiki.net.
  3. #
  4. # Copyright 2013 Google Inc.
  5. # https://developers.google.com/blockly/
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License");
  8. # you may not use this file except in compliance with the License.
  9. # You may obtain a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS,
  15. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18. import argparse
  19. import os
  20. import re
  21. import subprocess
  22. import sys
  23. from xml.dom import minidom
  24. from common import InputError
  25. from common import write_files
  26. # Global variables
  27. args = None # Parsed command-line arguments.
  28. def _parse_trans_unit(trans_unit):
  29. """Converts a trans-unit XML node into a more convenient dictionary format.
  30. Args:
  31. trans_unit: An XML representation of a .xlf translation unit.
  32. Returns:
  33. A dictionary with useful information about the translation unit.
  34. The returned dictionary is guaranteed to have an entry for 'key' and
  35. may have entries for 'source', 'target', 'description', and 'meaning'
  36. if present in the argument.
  37. Raises:
  38. InputError: A required field was not present.
  39. """
  40. def get_value(tag_name):
  41. elts = trans_unit.getElementsByTagName(tag_name)
  42. if not elts:
  43. return None
  44. elif len(elts) == 1:
  45. return ''.join([child.toxml() for child in elts[0].childNodes])
  46. else:
  47. raise InputError('', 'Unable to extract ' + tag_name)
  48. result = {}
  49. key = trans_unit.getAttribute('id')
  50. if not key:
  51. raise InputError('', 'id attribute not found')
  52. result['key'] = key
  53. # Get source and target, if present.
  54. try:
  55. result['source'] = get_value('source')
  56. result['target'] = get_value('target')
  57. except InputError, e:
  58. raise InputError(key, e.msg)
  59. # Get notes, using the from value as key and the data as value.
  60. notes = trans_unit.getElementsByTagName('note')
  61. for note in notes:
  62. from_value = note.getAttribute('from')
  63. if from_value and len(note.childNodes) == 1:
  64. result[from_value] = note.childNodes[0].data
  65. else:
  66. raise InputError(key, 'Unable to extract ' + from_value)
  67. return result
  68. def _process_file(filename):
  69. """Builds list of translation units from input file.
  70. Each translation unit in the input file includes:
  71. - an id (opaquely generated by Soy)
  72. - the Blockly name for the message
  73. - the text in the source language (generally English)
  74. - a description for the translator
  75. The Soy and Blockly ids are joined with a hyphen and serve as the
  76. keys in both output files. The value is the corresponding text (in the
  77. <lang>.json file) or the description (in the qqq.json file).
  78. Args:
  79. filename: The name of an .xlf file produced by Closure.
  80. Raises:
  81. IOError: An I/O error occurred with an input or output file.
  82. InputError: The input file could not be parsed or lacked required
  83. fields.
  84. Returns:
  85. A list of dictionaries produced by parse_trans_unit().
  86. """
  87. try:
  88. results = [] # list of dictionaries (return value)
  89. names = [] # list of names of encountered keys (local variable)
  90. try:
  91. parsed_xml = minidom.parse(filename)
  92. except IOError:
  93. # Don't get caught by below handler
  94. raise
  95. except Exception, e:
  96. print
  97. raise InputError(filename, str(e))
  98. # Make sure needed fields are present and non-empty.
  99. for trans_unit in parsed_xml.getElementsByTagName('trans-unit'):
  100. unit = _parse_trans_unit(trans_unit)
  101. for key in ['description', 'meaning', 'source']:
  102. if not key in unit or not unit[key]:
  103. raise InputError(filename + ':' + unit['key'],
  104. key + ' not found')
  105. if unit['description'].lower() == 'ibid':
  106. if unit['meaning'] not in names:
  107. # If the term has not already been described, the use of 'ibid'
  108. # is an error.
  109. raise InputError(
  110. filename,
  111. 'First encountered definition of: ' + unit['meaning']
  112. + ' has definition: ' + unit['description']
  113. + '. This error can occur if the definition was not'
  114. + ' provided on the first appearance of the message'
  115. + ' or if the source (English-language) messages differ.')
  116. else:
  117. # If term has already been described, 'ibid' was used correctly,
  118. # and we output nothing.
  119. pass
  120. else:
  121. if unit['meaning'] in names:
  122. raise InputError(filename,
  123. 'Second definition of: ' + unit['meaning'])
  124. names.append(unit['meaning'])
  125. results.append(unit)
  126. return results
  127. except IOError, e:
  128. print 'Error with file {0}: {1}'.format(filename, e.strerror)
  129. sys.exit(1)
  130. def sort_units(units, templates):
  131. """Sorts the translation units by their definition order in the template.
  132. Args:
  133. units: A list of dictionaries produced by parse_trans_unit()
  134. that have a non-empty value for the key 'meaning'.
  135. templates: A string containing the Soy templates in which each of
  136. the units' meanings is defined.
  137. Returns:
  138. A new list of translation units, sorted by the order in which
  139. their meaning is defined in the templates.
  140. Raises:
  141. InputError: If a meaning definition cannot be found in the
  142. templates.
  143. """
  144. def key_function(unit):
  145. match = re.search(
  146. '\\smeaning\\s*=\\s*"{0}"\\s'.format(unit['meaning']),
  147. templates)
  148. if match:
  149. return match.start()
  150. else:
  151. raise InputError(args.templates,
  152. 'msg definition for meaning not found: ' +
  153. unit['meaning'])
  154. return sorted(units, key=key_function)
  155. def main():
  156. """Parses arguments and processes the specified file.
  157. Raises:
  158. IOError: An I/O error occurred with an input or output file.
  159. InputError: Input files lacked required fields.
  160. """
  161. # Set up argument parser.
  162. parser = argparse.ArgumentParser(description='Create translation files.')
  163. parser.add_argument(
  164. '--author',
  165. default='Ellen Spertus <ellen.spertus@gmail.com>',
  166. help='name and email address of contact for translators')
  167. parser.add_argument('--lang', default='en',
  168. help='ISO 639-1 source language code')
  169. parser.add_argument('--output_dir', default='json',
  170. help='relative directory for output files')
  171. parser.add_argument('--xlf', help='file containing xlf definitions')
  172. parser.add_argument('--templates', default=['template.soy'], nargs='+',
  173. help='relative path to Soy templates, comma or space '
  174. 'separated (used for ordering messages)')
  175. global args
  176. args = parser.parse_args()
  177. # Make sure output_dir ends with slash.
  178. if (not args.output_dir.endswith(os.path.sep)):
  179. args.output_dir += os.path.sep
  180. # Process the input file, and sort the entries.
  181. units = _process_file(args.xlf)
  182. files = []
  183. for arg in args.templates:
  184. for filename in arg.split(','):
  185. filename = filename.strip();
  186. if filename:
  187. with open(filename) as myfile:
  188. files.append(' '.join(line.strip() for line in myfile))
  189. sorted_units = sort_units(units, ' '.join(files))
  190. # Write the output files.
  191. write_files(args.author, args.lang, args.output_dir, sorted_units, True)
  192. # Delete the input .xlf file.
  193. os.remove(args.xlf)
  194. print('Removed ' + args.xlf)
  195. if __name__ == '__main__':
  196. main()