charlistdecompressor.js 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. // Copyright 2009 The Closure Library Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS-IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /**
  15. * @fileoverview The decompressor for Base88 compressed character lists.
  16. *
  17. * The compression is by base 88 encoding the delta between two adjacent
  18. * characters in ths list. The deltas can be positive or negative. Also, there
  19. * would be character ranges. These three types of values
  20. * are given enum values 0, 1 and 2 respectively. Initial 3 bits are used for
  21. * encoding the type and total length of the encoded value. Length enums 0, 1
  22. * and 2 represents lengths 1, 2 and 4. So (value * 8 + type * 3 + length enum)
  23. * is encoded in base 88 by following characters for numbers from 0 to 87:
  24. * 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ (continued in next line)
  25. * abcdefghijklmnopqrstuvwxyz!#$%()*+,-.:;<=>?@[]^_`{|}~
  26. *
  27. * Value uses 0 based counting. That is value for the range [a, b] is 0 and
  28. * that of [a, c] is 1. Simillarly, the delta of "ab" is 0.
  29. *
  30. * Following python script can be used to compress character lists taken
  31. * standard input: http://go/charlistcompressor.py
  32. *
  33. */
  34. goog.provide('goog.i18n.CharListDecompressor');
  35. goog.require('goog.array');
  36. goog.require('goog.i18n.uChar');
  37. /**
  38. * Class to decompress base88 compressed character list.
  39. * @constructor
  40. * @final
  41. */
  42. goog.i18n.CharListDecompressor = function() {
  43. this.buildCharMap_(
  44. '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr' +
  45. 'stuvwxyz!#$%()*+,-.:;<=>?@[]^_`{|}~');
  46. };
  47. /**
  48. * 1-1 mapping from ascii characters used in encoding to an integer in the
  49. * range 0 to 87.
  50. * @type {Object}
  51. * @private
  52. */
  53. goog.i18n.CharListDecompressor.prototype.charMap_ = null;
  54. /**
  55. * Builds the map from ascii characters used for the base88 scheme to number
  56. * each character represents.
  57. * @param {string} str The string of characters used in base88 scheme.
  58. * @private
  59. */
  60. goog.i18n.CharListDecompressor.prototype.buildCharMap_ = function(str) {
  61. if (!this.charMap_) {
  62. this.charMap_ = {};
  63. for (var i = 0; i < str.length; i++) {
  64. this.charMap_[str.charAt(i)] = i;
  65. }
  66. }
  67. };
  68. /**
  69. * Gets the number encoded in base88 scheme by a substring of given length
  70. * and placed at the a given position of the string.
  71. * @param {string} str String containing sequence of characters encoding a
  72. * number in base 88 scheme.
  73. * @param {number} start Starting position of substring encoding the number.
  74. * @param {number} leng Length of the substring encoding the number.
  75. * @return {number} The encoded number.
  76. * @private
  77. */
  78. goog.i18n.CharListDecompressor.prototype.getCodeAt_ = function(
  79. str, start, leng) {
  80. var result = 0;
  81. for (var i = 0; i < leng; i++) {
  82. var c = this.charMap_[str.charAt(start + i)];
  83. result += c * Math.pow(88, i);
  84. }
  85. return result;
  86. };
  87. /**
  88. * Add character(s) specified by the value and type to given list and return
  89. * the next character in the sequence.
  90. * @param {Array<string>} list The list of characters to which the specified
  91. * characters are appended.
  92. * @param {number} lastcode The last codepoint that was added to the list.
  93. * @param {number} value The value component that representing the delta or
  94. * range.
  95. * @param {number} type The type component that representing whether the value
  96. * is a positive or negative delta or range.
  97. * @return {number} Last codepoint that is added to the list.
  98. * @private
  99. */
  100. goog.i18n.CharListDecompressor.prototype.addChars_ = function(
  101. list, lastcode, value, type) {
  102. if (type == 0) {
  103. lastcode += value + 1;
  104. goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
  105. } else if (type == 1) {
  106. lastcode -= value + 1;
  107. goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
  108. } else if (type == 2) {
  109. for (var i = 0; i <= value; i++) {
  110. lastcode++;
  111. goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
  112. }
  113. }
  114. return lastcode;
  115. };
  116. /**
  117. * Gets the list of characters specified in the given string by base 88 scheme.
  118. * @param {string} str The string encoding character list.
  119. * @return {!Array<string>} The list of characters specified by the given
  120. * string in base 88 scheme.
  121. */
  122. goog.i18n.CharListDecompressor.prototype.toCharList = function(str) {
  123. var metasize = 8;
  124. var result = [];
  125. var lastcode = 0;
  126. var i = 0;
  127. while (i < str.length) {
  128. var c = this.charMap_[str.charAt(i)];
  129. var meta = c % metasize;
  130. var type = Math.floor(meta / 3);
  131. var leng = (meta % 3) + 1;
  132. if (leng == 3) {
  133. leng++;
  134. }
  135. var code = this.getCodeAt_(str, i, leng);
  136. var value = Math.floor(code / metasize);
  137. lastcode = this.addChars_(result, lastcode, value, type);
  138. i += leng;
  139. }
  140. return result;
  141. };