linkify.js 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. // Copyright 2008 The Closure Library Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS-IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /**
  15. * @fileoverview Utility function for linkifying text.
  16. * @author bolinfest@google.com (Michael Bolin)
  17. */
  18. goog.provide('goog.string.linkify');
  19. goog.require('goog.html.SafeHtml');
  20. goog.require('goog.string');
  21. /**
  22. * Takes a string of plain text and linkifies URLs and email addresses. For a
  23. * URL (unless opt_attributes is specified), the target of the link will be
  24. * _blank and it will have a rel=nofollow attribute applied to it so that links
  25. * created by linkify will not be of interest to search engines.
  26. * @param {string} text Plain text.
  27. * @param {!Object<string, ?goog.html.SafeHtml.AttributeValue>=} opt_attributes
  28. * Attributes to add to all links created. Default are rel=nofollow and
  29. * target=_blank. To clear those default attributes set rel='' and
  30. * target=''.
  31. * @param {boolean=} opt_preserveNewlines Whether to preserve newlines with
  32. * &lt;br&gt;.
  33. * @return {!goog.html.SafeHtml} Linkified HTML. Any text that is not part of a
  34. * link will be HTML-escaped.
  35. */
  36. goog.string.linkify.linkifyPlainTextAsHtml = function(
  37. text, opt_attributes, opt_preserveNewlines) {
  38. // This shortcut makes linkifyPlainText ~10x faster if text doesn't contain
  39. // URLs or email addresses and adds insignificant performance penalty if it
  40. // does.
  41. if (text.indexOf('@') == -1 && text.indexOf('://') == -1 &&
  42. text.indexOf('www.') == -1 && text.indexOf('Www.') == -1 &&
  43. text.indexOf('WWW.') == -1) {
  44. return opt_preserveNewlines ?
  45. goog.html.SafeHtml.htmlEscapePreservingNewlines(text) :
  46. goog.html.SafeHtml.htmlEscape(text);
  47. }
  48. var attributesMap = {};
  49. for (var key in opt_attributes) {
  50. if (!opt_attributes[key]) {
  51. // Our API allows '' to omit the attribute, SafeHtml requires null.
  52. attributesMap[key] = null;
  53. } else {
  54. attributesMap[key] = opt_attributes[key];
  55. }
  56. }
  57. // Set default options if they haven't been explicitly set.
  58. if (!('rel' in attributesMap)) {
  59. attributesMap['rel'] = 'nofollow';
  60. }
  61. if (!('target' in attributesMap)) {
  62. attributesMap['target'] = '_blank';
  63. }
  64. var output = [];
  65. // Return value is ignored.
  66. text.replace(
  67. goog.string.linkify.FIND_LINKS_RE_,
  68. function(part, before, original, email, protocol) {
  69. output.push(
  70. opt_preserveNewlines ?
  71. goog.html.SafeHtml.htmlEscapePreservingNewlines(before) :
  72. before);
  73. if (!original) {
  74. return '';
  75. }
  76. var href = '';
  77. /** @type {string} */
  78. var linkText;
  79. /** @type {string} */
  80. var afterLink;
  81. if (email) {
  82. href = 'mailto:';
  83. linkText = email;
  84. afterLink = '';
  85. } else {
  86. // This is a full url link.
  87. if (!protocol) {
  88. href = 'http://';
  89. }
  90. var splitEndingPunctuation =
  91. original.match(goog.string.linkify.ENDS_WITH_PUNCTUATION_RE_);
  92. // An open paren in the link will often be matched with a close paren
  93. // at the end, so skip cutting off ending punctuation if there's an
  94. // open paren. For example:
  95. // http://en.wikipedia.org/wiki/Titanic_(1997_film)
  96. if (splitEndingPunctuation && !goog.string.contains(original, '(')) {
  97. linkText = splitEndingPunctuation[1];
  98. afterLink = splitEndingPunctuation[2];
  99. } else {
  100. linkText = original;
  101. afterLink = '';
  102. }
  103. }
  104. attributesMap['href'] = href + linkText;
  105. output.push(goog.html.SafeHtml.create('a', attributesMap, linkText));
  106. output.push(
  107. opt_preserveNewlines ?
  108. goog.html.SafeHtml.htmlEscapePreservingNewlines(afterLink) :
  109. afterLink);
  110. return '';
  111. });
  112. return goog.html.SafeHtml.concat(output);
  113. };
  114. /**
  115. * Gets the first URI in text.
  116. * @param {string} text Plain text.
  117. * @return {string} The first URL, or an empty string if not found.
  118. */
  119. goog.string.linkify.findFirstUrl = function(text) {
  120. var link = text.match(goog.string.linkify.URL_RE_);
  121. return link != null ? link[0] : '';
  122. };
  123. /**
  124. * Gets the first email address in text.
  125. * @param {string} text Plain text.
  126. * @return {string} The first email address, or an empty string if not found.
  127. */
  128. goog.string.linkify.findFirstEmail = function(text) {
  129. var email = text.match(goog.string.linkify.EMAIL_RE_);
  130. return email != null ? email[0] : '';
  131. };
  132. /**
  133. * If a series of these characters is at the end of a url, it will be considered
  134. * punctuation and not part of the url.
  135. * @type {string}
  136. * @const
  137. * @private
  138. */
  139. goog.string.linkify.ENDING_PUNCTUATION_CHARS_ = ':;,\\.?}\\]\\)!';
  140. /**
  141. * @type {!RegExp}
  142. * @const
  143. * @private
  144. */
  145. goog.string.linkify.ENDS_WITH_PUNCTUATION_RE_ = new RegExp(
  146. '^(.*?)([' + goog.string.linkify.ENDING_PUNCTUATION_CHARS_ + ']+)$');
  147. /**
  148. * Set of characters to be put into a regex character set ("[...]"), used to
  149. * match against a url hostname and everything after it. It includes, in order,
  150. * \w which represents [a-zA-Z0-9_], "#-;" which represents the characters
  151. * "#$%&'()*+,-./0123456789:;" and the characters "!=?@[\]`{|}~".
  152. * @type {string}
  153. * @const
  154. * @private
  155. */
  156. goog.string.linkify.ACCEPTABLE_URL_CHARS_ = '\\w#-;!=?@\\[\\\\\\]_`{|}~';
  157. /**
  158. * List of all protocols patterns recognized in urls (mailto is handled in email
  159. * matching).
  160. * @type {!Array<string>}
  161. * @const
  162. * @private
  163. */
  164. goog.string.linkify.RECOGNIZED_PROTOCOLS_ = ['https?', 'ftp'];
  165. /**
  166. * Regular expression pattern that matches the beginning of an url.
  167. * Contains a catching group to capture the scheme.
  168. * @type {string}
  169. * @const
  170. * @private
  171. */
  172. goog.string.linkify.PROTOCOL_START_ =
  173. '(' + goog.string.linkify.RECOGNIZED_PROTOCOLS_.join('|') + ')://';
  174. /**
  175. * Regular expression pattern that matches the beginning of a typical
  176. * http url without the http:// scheme.
  177. * @type {string}
  178. * @const
  179. * @private
  180. */
  181. goog.string.linkify.WWW_START_ = 'www\\.';
  182. /**
  183. * Regular expression pattern that matches an url.
  184. * @type {string}
  185. * @const
  186. * @private
  187. */
  188. goog.string.linkify.URL_RE_STRING_ = '(?:' +
  189. goog.string.linkify.PROTOCOL_START_ + '|' + goog.string.linkify.WWW_START_ +
  190. ')[' + goog.string.linkify.ACCEPTABLE_URL_CHARS_ + ']+';
  191. /**
  192. * Regular expression that matches an url. Case-insensitive.
  193. * @type {!RegExp}
  194. * @const
  195. * @private
  196. */
  197. goog.string.linkify.URL_RE_ =
  198. new RegExp(goog.string.linkify.URL_RE_STRING_, 'i');
  199. /**
  200. * Regular expression pattern that matches a top level domain.
  201. * @type {string}
  202. * @const
  203. * @private
  204. */
  205. goog.string.linkify.TOP_LEVEL_DOMAIN_ = '(?:com|org|net|edu|gov' +
  206. // from http://www.iana.org/gtld/gtld.htm
  207. '|aero|biz|cat|coop|info|int|jobs|mobi|museum|name|pro|travel' +
  208. '|arpa|asia|xxx' +
  209. // a two letter country code
  210. '|[a-z][a-z])\\b';
  211. /**
  212. * Regular expression pattern that matches an email.
  213. * Contains a catching group to capture the email without the optional "mailto:"
  214. * prefix.
  215. * @type {string}
  216. * @const
  217. * @private
  218. */
  219. goog.string.linkify.EMAIL_RE_STRING_ =
  220. '(?:mailto:)?([\\w.!#$%&\'*+-/=?^_`{|}~]+@[A-Za-z0-9.-]+\\.' +
  221. goog.string.linkify.TOP_LEVEL_DOMAIN_ + ')';
  222. /**
  223. * Regular expression that matches an email. Case-insensitive.
  224. * @type {!RegExp}
  225. * @const
  226. * @private
  227. */
  228. goog.string.linkify.EMAIL_RE_ =
  229. new RegExp(goog.string.linkify.EMAIL_RE_STRING_, 'i');
  230. /**
  231. * Regular expression to match all the links (url or email) in a string.
  232. * First match is text before first link, might be empty string.
  233. * Second match is the original text that should be replaced by a link.
  234. * Third match is the email address in the case of an email.
  235. * Fourth match is the scheme of the url if specified.
  236. * @type {!RegExp}
  237. * @const
  238. * @private
  239. */
  240. goog.string.linkify.FIND_LINKS_RE_ = new RegExp(
  241. // Match everything including newlines.
  242. '([\\S\\s]*?)(' +
  243. // Match email after a word break.
  244. '\\b' + goog.string.linkify.EMAIL_RE_STRING_ + '|' +
  245. // Match url after a word break.
  246. '\\b' + goog.string.linkify.URL_RE_STRING_ + '|$)',
  247. 'gi');