format.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. // Copyright 2006 The Closure Library Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS-IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /**
  15. * @fileoverview Provides utility functions for formatting strings, numbers etc.
  16. *
  17. */
  18. goog.provide('goog.format');
  19. goog.require('goog.i18n.GraphemeBreak');
  20. goog.require('goog.string');
  21. goog.require('goog.userAgent');
  22. /**
  23. * Formats a number of bytes in human readable form.
  24. * 54, 450K, 1.3M, 5G etc.
  25. * @param {number} bytes The number of bytes to show.
  26. * @param {number=} opt_decimals The number of decimals to use. Defaults to 2.
  27. * @return {string} The human readable form of the byte size.
  28. */
  29. goog.format.fileSize = function(bytes, opt_decimals) {
  30. return goog.format.numBytesToString(bytes, opt_decimals, false);
  31. };
  32. /**
  33. * Checks whether string value containing scaling units (K, M, G, T, P, m,
  34. * u, n) can be converted to a number.
  35. *
  36. * Where there is a decimal, there must be a digit to the left of the
  37. * decimal point.
  38. *
  39. * Negative numbers are valid.
  40. *
  41. * Examples:
  42. * 0, 1, 1.0, 10.4K, 2.3M, -0.3P, 1.2m
  43. *
  44. * @param {string} val String value to check.
  45. * @return {boolean} True if string could be converted to a numeric value.
  46. */
  47. goog.format.isConvertableScaledNumber = function(val) {
  48. return goog.format.SCALED_NUMERIC_RE_.test(val);
  49. };
  50. /**
  51. * Converts a string to numeric value, taking into account the units.
  52. * If string ends in 'B', use binary conversion.
  53. * @param {string} stringValue String to be converted to numeric value.
  54. * @return {number} Numeric value for string.
  55. */
  56. goog.format.stringToNumericValue = function(stringValue) {
  57. if (goog.string.endsWith(stringValue, 'B')) {
  58. return goog.format.stringToNumericValue_(
  59. stringValue, goog.format.NUMERIC_SCALES_BINARY_);
  60. }
  61. return goog.format.stringToNumericValue_(
  62. stringValue, goog.format.NUMERIC_SCALES_SI_);
  63. };
  64. /**
  65. * Converts a string to number of bytes, taking into account the units.
  66. * Binary conversion.
  67. * @param {string} stringValue String to be converted to numeric value.
  68. * @return {number} Numeric value for string.
  69. */
  70. goog.format.stringToNumBytes = function(stringValue) {
  71. return goog.format.stringToNumericValue_(
  72. stringValue, goog.format.NUMERIC_SCALES_BINARY_);
  73. };
  74. /**
  75. * Converts a numeric value to string representation. SI conversion.
  76. * @param {number} val Value to be converted.
  77. * @param {number=} opt_decimals The number of decimals to use. Defaults to 2.
  78. * @return {string} String representation of number.
  79. */
  80. goog.format.numericValueToString = function(val, opt_decimals) {
  81. return goog.format.numericValueToString_(
  82. val, goog.format.NUMERIC_SCALES_SI_, opt_decimals);
  83. };
  84. /**
  85. * Converts number of bytes to string representation. Binary conversion.
  86. * Default is to return the additional 'B' suffix only for scales greater than
  87. * 1K, e.g. '10.5KB' to minimize confusion with counts that are scaled by powers
  88. * of 1000. Otherwise, suffix is empty string.
  89. * @param {number} val Value to be converted.
  90. * @param {number=} opt_decimals The number of decimals to use. Defaults to 2.
  91. * @param {boolean=} opt_suffix If true, include trailing 'B' in returned
  92. * string. Default is true.
  93. * @param {boolean=} opt_useSeparator If true, number and scale will be
  94. * separated by a no break space. Default is false.
  95. * @return {string} String representation of number of bytes.
  96. */
  97. goog.format.numBytesToString = function(
  98. val, opt_decimals, opt_suffix, opt_useSeparator) {
  99. var suffix = '';
  100. if (!goog.isDef(opt_suffix) || opt_suffix) {
  101. suffix = 'B';
  102. }
  103. return goog.format.numericValueToString_(
  104. val, goog.format.NUMERIC_SCALES_BINARY_, opt_decimals, suffix,
  105. opt_useSeparator);
  106. };
  107. /**
  108. * Converts a string to numeric value, taking into account the units.
  109. * @param {string} stringValue String to be converted to numeric value.
  110. * @param {Object} conversion Dictionary of conversion scales.
  111. * @return {number} Numeric value for string. If it cannot be converted,
  112. * returns NaN.
  113. * @private
  114. */
  115. goog.format.stringToNumericValue_ = function(stringValue, conversion) {
  116. var match = stringValue.match(goog.format.SCALED_NUMERIC_RE_);
  117. if (!match) {
  118. return NaN;
  119. }
  120. var val = Number(match[1]) * conversion[match[2]];
  121. return val;
  122. };
  123. /**
  124. * Converts a numeric value to string, using specified conversion
  125. * scales.
  126. * @param {number} val Value to be converted.
  127. * @param {Object} conversion Dictionary of scaling factors.
  128. * @param {number=} opt_decimals The number of decimals to use. Default is 2.
  129. * @param {string=} opt_suffix Optional suffix to append.
  130. * @param {boolean=} opt_useSeparator If true, number and scale will be
  131. * separated by a space. Default is false.
  132. * @return {string} The human readable form of the byte size.
  133. * @private
  134. */
  135. goog.format.numericValueToString_ = function(
  136. val, conversion, opt_decimals, opt_suffix, opt_useSeparator) {
  137. var prefixes = goog.format.NUMERIC_SCALE_PREFIXES_;
  138. var orig_val = val;
  139. var symbol = '';
  140. var separator = '';
  141. var scale = 1;
  142. if (val < 0) {
  143. val = -val;
  144. }
  145. for (var i = 0; i < prefixes.length; i++) {
  146. var unit = prefixes[i];
  147. scale = conversion[unit];
  148. if (val >= scale || (scale <= 1 && val > 0.1 * scale)) {
  149. // Treat values less than 1 differently, allowing 0.5 to be "0.5" rather
  150. // than "500m"
  151. symbol = unit;
  152. break;
  153. }
  154. }
  155. if (!symbol) {
  156. scale = 1;
  157. } else {
  158. if (opt_suffix) {
  159. symbol += opt_suffix;
  160. }
  161. if (opt_useSeparator) {
  162. separator = ' ';
  163. }
  164. }
  165. var ex = Math.pow(10, goog.isDef(opt_decimals) ? opt_decimals : 2);
  166. return Math.round(orig_val / scale * ex) / ex + separator + symbol;
  167. };
  168. /**
  169. * Regular expression for detecting scaling units, such as K, M, G, etc. for
  170. * converting a string representation to a numeric value.
  171. *
  172. * Also allow 'k' to be aliased to 'K'. These could be used for SI (powers
  173. * of 1000) or Binary (powers of 1024) conversions.
  174. *
  175. * Also allow final 'B' to be interpreted as byte-count, implicitly triggering
  176. * binary conversion (e.g., '10.2MB').
  177. *
  178. * @type {RegExp}
  179. * @private
  180. */
  181. goog.format.SCALED_NUMERIC_RE_ = /^([-]?\d+\.?\d*)([K,M,G,T,P,k,m,u,n]?)[B]?$/;
  182. /**
  183. * Ordered list of scaling prefixes in decreasing order.
  184. * @private {Array<string>}
  185. */
  186. goog.format.NUMERIC_SCALE_PREFIXES_ =
  187. ['P', 'T', 'G', 'M', 'K', '', 'm', 'u', 'n'];
  188. /**
  189. * Scaling factors for conversion of numeric value to string. SI conversion.
  190. * @type {Object}
  191. * @private
  192. */
  193. goog.format.NUMERIC_SCALES_SI_ = {
  194. '': 1,
  195. 'n': 1e-9,
  196. 'u': 1e-6,
  197. 'm': 1e-3,
  198. 'k': 1e3,
  199. 'K': 1e3,
  200. 'M': 1e6,
  201. 'G': 1e9,
  202. 'T': 1e12,
  203. 'P': 1e15
  204. };
  205. /**
  206. * Scaling factors for conversion of numeric value to string. Binary
  207. * conversion.
  208. * @type {Object}
  209. * @private
  210. */
  211. goog.format.NUMERIC_SCALES_BINARY_ = {
  212. '': 1,
  213. 'n': Math.pow(1024, -3),
  214. 'u': Math.pow(1024, -2),
  215. 'm': 1.0 / 1024,
  216. 'k': 1024,
  217. 'K': 1024,
  218. 'M': Math.pow(1024, 2),
  219. 'G': Math.pow(1024, 3),
  220. 'T': Math.pow(1024, 4),
  221. 'P': Math.pow(1024, 5)
  222. };
  223. /**
  224. * First Unicode code point that has the Mark property.
  225. * @type {number}
  226. * @private
  227. */
  228. goog.format.FIRST_GRAPHEME_EXTEND_ = 0x300;
  229. /**
  230. * Returns true if and only if given character should be treated as a breaking
  231. * space. All ASCII control characters, the main Unicode range of spacing
  232. * characters (U+2000 to U+200B inclusive except for U+2007), and several other
  233. * Unicode space characters are treated as breaking spaces.
  234. * @param {number} charCode The character code under consideration.
  235. * @return {boolean} True if the character is a breaking space.
  236. * @private
  237. */
  238. goog.format.isTreatedAsBreakingSpace_ = function(charCode) {
  239. return (charCode <= goog.format.WbrToken_.SPACE) ||
  240. (charCode >= 0x1000 &&
  241. ((charCode >= 0x2000 && charCode <= 0x2006) ||
  242. (charCode >= 0x2008 && charCode <= 0x200B) || charCode == 0x1680 ||
  243. charCode == 0x180E || charCode == 0x2028 || charCode == 0x2029 ||
  244. charCode == 0x205f || charCode == 0x3000));
  245. };
  246. /**
  247. * Returns true if and only if given character is an invisible formatting
  248. * character.
  249. * @param {number} charCode The character code under consideration.
  250. * @return {boolean} True if the character is an invisible formatting character.
  251. * @private
  252. */
  253. goog.format.isInvisibleFormattingCharacter_ = function(charCode) {
  254. // See: http://unicode.org/charts/PDF/U2000.pdf
  255. return (charCode >= 0x200C && charCode <= 0x200F) ||
  256. (charCode >= 0x202A && charCode <= 0x202E);
  257. };
  258. /**
  259. * Inserts word breaks into an HTML string at a given interval. The counter is
  260. * reset if a space or a character which behaves like a space is encountered,
  261. * but it isn't incremented if an invisible formatting character is encountered.
  262. * WBRs aren't inserted into HTML tags or entities. Entities count towards the
  263. * character count, HTML tags do not.
  264. *
  265. * With common strings aliased, objects allocations are constant based on the
  266. * length of the string: N + 3. This guarantee does not hold if the string
  267. * contains an element >= U+0300 and hasGraphemeBreak is non-trivial.
  268. *
  269. * @param {string} str HTML to insert word breaks into.
  270. * @param {function(number, number, boolean): boolean} hasGraphemeBreak A
  271. * function determining if there is a grapheme break between two characters,
  272. * in the same signature as goog.i18n.GraphemeBreak.hasGraphemeBreak.
  273. * @param {number=} opt_maxlen Maximum length after which to ensure
  274. * there is a break. Default is 10 characters.
  275. * @return {string} The string including word breaks.
  276. * @private
  277. */
  278. goog.format.insertWordBreaksGeneric_ = function(
  279. str, hasGraphemeBreak, opt_maxlen) {
  280. var maxlen = opt_maxlen || 10;
  281. if (maxlen > str.length) return str;
  282. var rv = [];
  283. var n = 0; // The length of the current token
  284. // This will contain the ampersand or less-than character if one of the
  285. // two has been seen; otherwise, the value is zero.
  286. var nestingCharCode = 0;
  287. // First character position from input string that has not been outputted.
  288. var lastDumpPosition = 0;
  289. var charCode = 0;
  290. for (var i = 0; i < str.length; i++) {
  291. // Using charCodeAt versus charAt avoids allocating new string objects.
  292. var lastCharCode = charCode;
  293. charCode = str.charCodeAt(i);
  294. // Don't add a WBR before characters that might be grapheme extending.
  295. var isPotentiallyGraphemeExtending =
  296. charCode >= goog.format.FIRST_GRAPHEME_EXTEND_ &&
  297. !hasGraphemeBreak(lastCharCode, charCode, true);
  298. // Don't add a WBR at the end of a word. For the purposes of determining
  299. // work breaks, all ASCII control characters and some commonly encountered
  300. // Unicode spacing characters are treated as breaking spaces.
  301. if (n >= maxlen && !goog.format.isTreatedAsBreakingSpace_(charCode) &&
  302. !isPotentiallyGraphemeExtending) {
  303. // Flush everything seen so far, and append a word break.
  304. rv.push(str.substring(lastDumpPosition, i), goog.format.WORD_BREAK_HTML);
  305. lastDumpPosition = i;
  306. n = 0;
  307. }
  308. if (!nestingCharCode) {
  309. // Not currently within an HTML tag or entity
  310. if (charCode == goog.format.WbrToken_.LT ||
  311. charCode == goog.format.WbrToken_.AMP) {
  312. // Entering an HTML Entity '&' or open tag '<'
  313. nestingCharCode = charCode;
  314. } else if (goog.format.isTreatedAsBreakingSpace_(charCode)) {
  315. // A space or control character -- reset the token length
  316. n = 0;
  317. } else if (!goog.format.isInvisibleFormattingCharacter_(charCode)) {
  318. // A normal flow character - increment. For grapheme extending
  319. // characters, this is not *technically* a new character. However,
  320. // since the grapheme break detector might be overly conservative,
  321. // we have to continue incrementing, or else we won't even be able
  322. // to add breaks when we get to things like punctuation. For the
  323. // case where we have a full grapheme break detector, it is okay if
  324. // we occasionally break slightly early.
  325. n++;
  326. }
  327. } else if (
  328. charCode == goog.format.WbrToken_.GT &&
  329. nestingCharCode == goog.format.WbrToken_.LT) {
  330. // Leaving an HTML tag, treat the tag as zero-length
  331. nestingCharCode = 0;
  332. } else if (
  333. charCode == goog.format.WbrToken_.SEMI_COLON &&
  334. nestingCharCode == goog.format.WbrToken_.AMP) {
  335. // Leaving an HTML entity, treat it as length one
  336. nestingCharCode = 0;
  337. n++;
  338. }
  339. }
  340. // Take care of anything we haven't flushed so far.
  341. rv.push(str.substr(lastDumpPosition));
  342. return rv.join('');
  343. };
  344. /**
  345. * Inserts word breaks into an HTML string at a given interval.
  346. *
  347. * This method is as aggressive as possible, using a full table of Unicode
  348. * characters where it is legal to insert word breaks; however, this table
  349. * comes at a 2.5k pre-gzip (~1k post-gzip) size cost. Consider using
  350. * insertWordBreaksBasic to minimize the size impact.
  351. *
  352. * @param {string} str HTML to insert word breaks into.
  353. * @param {number=} opt_maxlen Maximum length after which to ensure there is a
  354. * break. Default is 10 characters.
  355. * @return {string} The string including word breaks.
  356. * @deprecated Prefer wrapping with CSS word-wrap: break-word.
  357. */
  358. goog.format.insertWordBreaks = function(str, opt_maxlen) {
  359. return goog.format.insertWordBreaksGeneric_(
  360. str, goog.i18n.GraphemeBreak.hasGraphemeBreak, opt_maxlen);
  361. };
  362. /**
  363. * Determines conservatively if a character has a Grapheme break.
  364. *
  365. * Conforms to a similar signature as goog.i18n.GraphemeBreak, but is overly
  366. * conservative, returning true only for characters in common scripts that
  367. * are simple to account for.
  368. *
  369. * @param {number} lastCharCode The previous character code. Ignored.
  370. * @param {number} charCode The character code under consideration. It must be
  371. * at least \u0300 as a precondition -- this case is covered by
  372. * insertWordBreaksGeneric_.
  373. * @param {boolean=} opt_extended Ignored, to conform with the interface.
  374. * @return {boolean} Whether it is one of the recognized subsets of characters
  375. * with a grapheme break.
  376. * @private
  377. */
  378. goog.format.conservativelyHasGraphemeBreak_ = function(
  379. lastCharCode, charCode, opt_extended) {
  380. // Return false for everything except the most common Cyrillic characters.
  381. // Don't worry about Latin characters, because insertWordBreaksGeneric_
  382. // itself already handles those.
  383. // TODO(gboyer): Also account for Greek, Armenian, and Georgian if it is
  384. // simple to do so.
  385. return charCode >= 0x400 && charCode < 0x523;
  386. };
  387. // TODO(gboyer): Consider using a compile-time flag to switch implementations
  388. // rather than relying on the developers to toggle implementations.
  389. /**
  390. * Inserts word breaks into an HTML string at a given interval.
  391. *
  392. * This method is less aggressive than insertWordBreaks, only inserting
  393. * breaks next to punctuation and between Latin or Cyrillic characters.
  394. * However, this is good enough for the common case of URLs. It also
  395. * works for all Latin and Cyrillic languages, plus CJK has no need for word
  396. * breaks. When this method is used, goog.i18n.GraphemeBreak may be dead
  397. * code eliminated.
  398. *
  399. * @param {string} str HTML to insert word breaks into.
  400. * @param {number=} opt_maxlen Maximum length after which to ensure there is a
  401. * break. Default is 10 characters.
  402. * @return {string} The string including word breaks.
  403. * @deprecated Prefer wrapping with CSS word-wrap: break-word.
  404. */
  405. goog.format.insertWordBreaksBasic = function(str, opt_maxlen) {
  406. return goog.format.insertWordBreaksGeneric_(
  407. str, goog.format.conservativelyHasGraphemeBreak_, opt_maxlen);
  408. };
  409. /**
  410. * True iff the current userAgent is IE8 or above.
  411. * @type {boolean}
  412. * @private
  413. */
  414. goog.format.IS_IE8_OR_ABOVE_ =
  415. goog.userAgent.IE && goog.userAgent.isVersionOrHigher(8);
  416. /**
  417. * Constant for the WBR replacement used by insertWordBreaks. Safari requires
  418. * <wbr></wbr>, Opera needs the &shy; entity, though this will give a visible
  419. * hyphen at breaks. IE8 uses a zero width space.
  420. * Other browsers just use <wbr>.
  421. * @type {string}
  422. */
  423. goog.format.WORD_BREAK_HTML =
  424. goog.userAgent.WEBKIT ? '<wbr></wbr>' : goog.userAgent.OPERA ?
  425. '&shy;' :
  426. goog.format.IS_IE8_OR_ABOVE_ ? '&#8203;' : '<wbr>';
  427. /**
  428. * Tokens used within insertWordBreaks.
  429. * @private
  430. * @enum {number}
  431. */
  432. goog.format.WbrToken_ = {
  433. LT: 60, // '<'.charCodeAt(0)
  434. GT: 62, // '>'.charCodeAt(0)
  435. AMP: 38, // '&'.charCodeAt(0)
  436. SEMI_COLON: 59, // ';'.charCodeAt(0)
  437. SPACE: 32 // ' '.charCodeAt(0)
  438. };