dedup_json.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. #!/usr/bin/python
  2. # Consolidates duplicate key-value pairs in a JSON file.
  3. # If the same key is used with different values, no warning is given,
  4. # and there is no guarantee about which key-value pair will be output.
  5. # There is also no guarantee as to the order of the key-value pairs
  6. # output.
  7. #
  8. # Copyright 2013 Google Inc.
  9. # https://developers.google.com/blockly/
  10. #
  11. # Licensed under the Apache License, Version 2.0 (the "License");
  12. # you may not use this file except in compliance with the License.
  13. # You may obtain a copy of the License at
  14. #
  15. # http://www.apache.org/licenses/LICENSE-2.0
  16. #
  17. # Unless required by applicable law or agreed to in writing, software
  18. # distributed under the License is distributed on an "AS IS" BASIS,
  19. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  20. # See the License for the specific language governing permissions and
  21. # limitations under the License.
  22. import argparse
  23. import codecs
  24. import json
  25. from common import InputError
  26. def main():
  27. """Parses arguments and iterates over files.
  28. Raises:
  29. IOError: An I/O error occurred with an input or output file.
  30. InputError: Input JSON could not be parsed.
  31. """
  32. # Set up argument parser.
  33. parser = argparse.ArgumentParser(
  34. description='Removes duplicate key-value pairs from JSON files.')
  35. parser.add_argument('--suffix', default='',
  36. help='optional suffix for output files; '
  37. 'if empty, files will be changed in place')
  38. parser.add_argument('files', nargs='+', help='input files')
  39. args = parser.parse_args()
  40. # Iterate over files.
  41. for filename in args.files:
  42. # Read in json using Python libraries. This eliminates duplicates.
  43. print('Processing ' + filename + '...')
  44. try:
  45. with codecs.open(filename, 'r', 'utf-8') as infile:
  46. j = json.load(infile)
  47. except ValueError, e:
  48. print('Error reading ' + filename)
  49. raise InputError(file, str(e))
  50. # Built up output strings as an array to make output of delimiters easier.
  51. output = []
  52. for key in j:
  53. if key != '@metadata':
  54. output.append('\t"' + key + '": "' +
  55. j[key].replace('\n', '\\n') + '"')
  56. # Output results.
  57. with codecs.open(filename + args.suffix, 'w', 'utf-8') as outfile:
  58. outfile.write('{\n')
  59. outfile.write(',\n'.join(output))
  60. outfile.write('\n}\n')
  61. if __name__ == '__main__':
  62. main()