123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282 |
- // Copyright 2008 The Closure Library Authors. All Rights Reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS-IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- /**
- * @fileoverview Utility function for linkifying text.
- * @author bolinfest@google.com (Michael Bolin)
- */
- goog.provide('goog.string.linkify');
- goog.require('goog.html.SafeHtml');
- goog.require('goog.string');
- /**
- * Takes a string of plain text and linkifies URLs and email addresses. For a
- * URL (unless opt_attributes is specified), the target of the link will be
- * _blank and it will have a rel=nofollow attribute applied to it so that links
- * created by linkify will not be of interest to search engines.
- * @param {string} text Plain text.
- * @param {!Object<string, ?goog.html.SafeHtml.AttributeValue>=} opt_attributes
- * Attributes to add to all links created. Default are rel=nofollow and
- * target=_blank. To clear those default attributes set rel='' and
- * target=''.
- * @param {boolean=} opt_preserveNewlines Whether to preserve newlines with
- * <br>.
- * @return {!goog.html.SafeHtml} Linkified HTML. Any text that is not part of a
- * link will be HTML-escaped.
- */
- goog.string.linkify.linkifyPlainTextAsHtml = function(
- text, opt_attributes, opt_preserveNewlines) {
- // This shortcut makes linkifyPlainText ~10x faster if text doesn't contain
- // URLs or email addresses and adds insignificant performance penalty if it
- // does.
- if (text.indexOf('@') == -1 && text.indexOf('://') == -1 &&
- text.indexOf('www.') == -1 && text.indexOf('Www.') == -1 &&
- text.indexOf('WWW.') == -1) {
- return opt_preserveNewlines ?
- goog.html.SafeHtml.htmlEscapePreservingNewlines(text) :
- goog.html.SafeHtml.htmlEscape(text);
- }
- var attributesMap = {};
- for (var key in opt_attributes) {
- if (!opt_attributes[key]) {
- // Our API allows '' to omit the attribute, SafeHtml requires null.
- attributesMap[key] = null;
- } else {
- attributesMap[key] = opt_attributes[key];
- }
- }
- // Set default options if they haven't been explicitly set.
- if (!('rel' in attributesMap)) {
- attributesMap['rel'] = 'nofollow';
- }
- if (!('target' in attributesMap)) {
- attributesMap['target'] = '_blank';
- }
- var output = [];
- // Return value is ignored.
- text.replace(
- goog.string.linkify.FIND_LINKS_RE_,
- function(part, before, original, email, protocol) {
- output.push(
- opt_preserveNewlines ?
- goog.html.SafeHtml.htmlEscapePreservingNewlines(before) :
- before);
- if (!original) {
- return '';
- }
- var href = '';
- /** @type {string} */
- var linkText;
- /** @type {string} */
- var afterLink;
- if (email) {
- href = 'mailto:';
- linkText = email;
- afterLink = '';
- } else {
- // This is a full url link.
- if (!protocol) {
- href = 'http://';
- }
- var splitEndingPunctuation =
- original.match(goog.string.linkify.ENDS_WITH_PUNCTUATION_RE_);
- // An open paren in the link will often be matched with a close paren
- // at the end, so skip cutting off ending punctuation if there's an
- // open paren. For example:
- // http://en.wikipedia.org/wiki/Titanic_(1997_film)
- if (splitEndingPunctuation && !goog.string.contains(original, '(')) {
- linkText = splitEndingPunctuation[1];
- afterLink = splitEndingPunctuation[2];
- } else {
- linkText = original;
- afterLink = '';
- }
- }
- attributesMap['href'] = href + linkText;
- output.push(goog.html.SafeHtml.create('a', attributesMap, linkText));
- output.push(
- opt_preserveNewlines ?
- goog.html.SafeHtml.htmlEscapePreservingNewlines(afterLink) :
- afterLink);
- return '';
- });
- return goog.html.SafeHtml.concat(output);
- };
- /**
- * Gets the first URI in text.
- * @param {string} text Plain text.
- * @return {string} The first URL, or an empty string if not found.
- */
- goog.string.linkify.findFirstUrl = function(text) {
- var link = text.match(goog.string.linkify.URL_RE_);
- return link != null ? link[0] : '';
- };
- /**
- * Gets the first email address in text.
- * @param {string} text Plain text.
- * @return {string} The first email address, or an empty string if not found.
- */
- goog.string.linkify.findFirstEmail = function(text) {
- var email = text.match(goog.string.linkify.EMAIL_RE_);
- return email != null ? email[0] : '';
- };
- /**
- * If a series of these characters is at the end of a url, it will be considered
- * punctuation and not part of the url.
- * @type {string}
- * @const
- * @private
- */
- goog.string.linkify.ENDING_PUNCTUATION_CHARS_ = ':;,\\.?}\\]\\)!';
- /**
- * @type {!RegExp}
- * @const
- * @private
- */
- goog.string.linkify.ENDS_WITH_PUNCTUATION_RE_ = new RegExp(
- '^(.*?)([' + goog.string.linkify.ENDING_PUNCTUATION_CHARS_ + ']+)$');
- /**
- * Set of characters to be put into a regex character set ("[...]"), used to
- * match against a url hostname and everything after it. It includes, in order,
- * \w which represents [a-zA-Z0-9_], "#-;" which represents the characters
- * "#$%&'()*+,-./0123456789:;" and the characters "!=?@[\]`{|}~".
- * @type {string}
- * @const
- * @private
- */
- goog.string.linkify.ACCEPTABLE_URL_CHARS_ = '\\w#-;!=?@\\[\\\\\\]_`{|}~';
- /**
- * List of all protocols patterns recognized in urls (mailto is handled in email
- * matching).
- * @type {!Array<string>}
- * @const
- * @private
- */
- goog.string.linkify.RECOGNIZED_PROTOCOLS_ = ['https?', 'ftp'];
- /**
- * Regular expression pattern that matches the beginning of an url.
- * Contains a catching group to capture the scheme.
- * @type {string}
- * @const
- * @private
- */
- goog.string.linkify.PROTOCOL_START_ =
- '(' + goog.string.linkify.RECOGNIZED_PROTOCOLS_.join('|') + ')://';
- /**
- * Regular expression pattern that matches the beginning of a typical
- * http url without the http:// scheme.
- * @type {string}
- * @const
- * @private
- */
- goog.string.linkify.WWW_START_ = 'www\\.';
- /**
- * Regular expression pattern that matches an url.
- * @type {string}
- * @const
- * @private
- */
- goog.string.linkify.URL_RE_STRING_ = '(?:' +
- goog.string.linkify.PROTOCOL_START_ + '|' + goog.string.linkify.WWW_START_ +
- ')[' + goog.string.linkify.ACCEPTABLE_URL_CHARS_ + ']+';
- /**
- * Regular expression that matches an url. Case-insensitive.
- * @type {!RegExp}
- * @const
- * @private
- */
- goog.string.linkify.URL_RE_ =
- new RegExp(goog.string.linkify.URL_RE_STRING_, 'i');
- /**
- * Regular expression pattern that matches a top level domain.
- * @type {string}
- * @const
- * @private
- */
- goog.string.linkify.TOP_LEVEL_DOMAIN_ = '(?:com|org|net|edu|gov' +
- // from http://www.iana.org/gtld/gtld.htm
- '|aero|biz|cat|coop|info|int|jobs|mobi|museum|name|pro|travel' +
- '|arpa|asia|xxx' +
- // a two letter country code
- '|[a-z][a-z])\\b';
- /**
- * Regular expression pattern that matches an email.
- * Contains a catching group to capture the email without the optional "mailto:"
- * prefix.
- * @type {string}
- * @const
- * @private
- */
- goog.string.linkify.EMAIL_RE_STRING_ =
- '(?:mailto:)?([\\w.!#$%&\'*+-/=?^_`{|}~]+@[A-Za-z0-9.-]+\\.' +
- goog.string.linkify.TOP_LEVEL_DOMAIN_ + ')';
- /**
- * Regular expression that matches an email. Case-insensitive.
- * @type {!RegExp}
- * @const
- * @private
- */
- goog.string.linkify.EMAIL_RE_ =
- new RegExp(goog.string.linkify.EMAIL_RE_STRING_, 'i');
- /**
- * Regular expression to match all the links (url or email) in a string.
- * First match is text before first link, might be empty string.
- * Second match is the original text that should be replaced by a link.
- * Third match is the email address in the case of an email.
- * Fourth match is the scheme of the url if specified.
- * @type {!RegExp}
- * @const
- * @private
- */
- goog.string.linkify.FIND_LINKS_RE_ = new RegExp(
- // Match everything including newlines.
- '([\\S\\s]*?)(' +
- // Match email after a word break.
- '\\b' + goog.string.linkify.EMAIL_RE_STRING_ + '|' +
- // Match url after a word break.
- '\\b' + goog.string.linkify.URL_RE_STRING_ + '|$)',
- 'gi');
|