typogr.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /*!
  2. * typogr.js
  3. * Copyright(c) 2011 Eugene Kalinin
  4. * MIT Licensed
  5. */
  6. (function (root) {
  7. /** Main typogr function */
  8. var typogr = function (obj) { return new Wrapper(obj); };
  9. // Current version
  10. typogr.version = '0.6.8';
  11. // Export the typogr object. In server-side for `require()` API.
  12. // If we're not in CommonJS, add `typogr` to the global object.
  13. if (typeof module !== 'undefined' && module.exports) {
  14. module.exports = typogr;
  15. } else if (typeof define === 'function' && define.amd) {
  16. define(function() { return typogr; });
  17. } else {
  18. root.typogr = typogr;
  19. }
  20. // typogr functions
  21. // ---------------------
  22. var re = function (regexp, flag) {
  23. return new RegExp(regexp, flag);
  24. };
  25. // RegExp for skip some tags
  26. var re_skip_tags = /<(\/)?(style|pre|code|kbd|script|math|title)[^>]*>/i;
  27. /**
  28. * Wraps apersands in HTML with ``<span class="amp">`` so they can be
  29. * styled with CSS. Apersands are also normalized to ``&amp;``. Requires
  30. * ampersands to have whitespace or an ``&nbsp;`` on both sides.
  31. *
  32. */
  33. var amp = typogr.amp = function(text) {
  34. var re_amp = /(\s|&nbsp;)(&|&amp;|&\#38;)(\s|&nbsp;)/g
  35. //( $1 )( $2 )( $3 )
  36. , re_intra_tag = /(<[^<]*>)?([^<]*)(<\/[^<]*>)?/g;
  37. //( prefix) ( txt )( suffix )
  38. if( !text && typeof text !== "string" ) {
  39. return;
  40. }
  41. return text.replace(re_intra_tag, function (str, prefix, text, suffix) {
  42. prefix = prefix || '';
  43. suffix = suffix || '';
  44. if (prefix.match(re_skip_tags)) return prefix + text + suffix;
  45. text = text.replace(re_amp, '$1<span class="amp">&amp;</span>$3');
  46. return prefix + text + suffix;
  47. });
  48. };
  49. /**
  50. * Wraps date suffix in <span class="ord"> so they can be styled with CSS.
  51. *
  52. */
  53. var ord = typogr.ord = function(text) {
  54. if( !text && typeof text !== "string" ) {
  55. return;
  56. }
  57. var tokens = tokenize(text)
  58. , result = []
  59. , in_skipped_tag = false
  60. , close_match
  61. , re_suffix = /(\d+)(st|nd|rd|th)/g;
  62. // $1 $2
  63. tokens.forEach( function (token) {
  64. if (token.type === 'tag') {
  65. result.push(token.txt);
  66. close_match = re_skip_tags.exec(token.txt);
  67. if (close_match && close_match[1] === undefined) {
  68. in_skipped_tag = true;
  69. } else {
  70. in_skipped_tag = false;
  71. }
  72. }
  73. else {
  74. if (in_skipped_tag) {
  75. result.push(token.txt);
  76. }
  77. else {
  78. result.push(token.txt.replace(re_suffix, '$1<span class="ord">$2</span>'));
  79. }
  80. }
  81. });
  82. return result.join('');
  83. };
  84. /**
  85. * Wraps initial quotes in ``class="dquo"`` for double quotes or ``class="quo"``
  86. * for single quotes. Works in these block tags ``(h1-h6, p, li, dt, dd)``
  87. * and also accounts for potential opening inline elements ``a, em, strong, span, b, i``
  88. *
  89. */
  90. var initQuotes = typogr.initQuotes = function(text) {
  91. var re_quote = re(
  92. '(?:(?:<(?:p|h[1-6]|li|dt|dd)[^>]*>|^)'+ // start with an opening
  93. // p, h1-6, li, dd, dt
  94. // or the start of the string
  95. '\\s*'+ // optional white space!
  96. '(?:<(?:a|em|span|strong|i|b)[^>]*>\\s*)*)'+//optional opening inline tags,
  97. // with more optional white space for each.
  98. '(?:("|&ldquo;|&#8220;)|'+ // Find me a quote! /only need to find
  99. '(\'|&lsquo;|&#8216;))' // the left quotes and the primes/
  100. , 'i');
  101. if( !text && typeof text !== "string" ) {
  102. return;
  103. }
  104. return text.replace(re_quote, function (matched_str, dquo, squo) {
  105. var classname = dquo ? "dquo" : "quo"
  106. , quote = dquo ? dquo : squo;
  107. return [matched_str.slice(0, matched_str.lastIndexOf(quote)), // all before quote
  108. '<span class="', classname, '">', quote, '</span>'].join('');
  109. });
  110. };
  111. /**
  112. * Replaces the space between the last two words in a string with ``&nbsp;``
  113. * Works in these block tags ``(h1-h6, p, li, dd, dt)`` and also accounts for
  114. * potential closing inline elements ``a, em, strong, span, b, i``
  115. *
  116. */
  117. var widont = typogr.widont = function(text) {
  118. var inline_tags = 'a|em|span|strong|i|b'
  119. var word = '(?:<(?:'+inline_tags+')[^>]*?>)*?[^\\s<>]+(?:</(?:'+inline_tags+')[^>]*?>)*?'
  120. var re_widont = re(
  121. '('+ // matching group 1
  122. '\\s+'+word+ // space and a word with a possible bordering tag
  123. '\\s+'+word+ // space and a word with a possible bordering tag
  124. ')'+
  125. '(?:\\s+)'+ // one or more space characters
  126. '('+ // matching group 2
  127. '[^<>\\s]+'+ // nontag/nonspace characters
  128. '(?:\\s*</(?:a|em|span|strong|i|b)[^>]*?>\\s*\\.*)*?'+ // one or more inline closing tags
  129. // can be surronded by spaces
  130. // and followed by a period.
  131. '(?:\\s*?</(?:p|h[1-6]|li|dt|dd)>|$)'+ // allowed closing tags or end of line
  132. ')', 'gi');
  133. return text.replace(re_widont, '$1<span class="widont">&nbsp;</span>$2');
  134. };
  135. /**
  136. * Wraps multiple capital letters in ``<span class="caps">``
  137. * so they can be styled with CSS.
  138. *
  139. */
  140. var caps = typogr.caps = function(text) {
  141. var tokens = tokenize(text)
  142. , result = []
  143. , in_skipped_tag = false
  144. , close_match
  145. , re_cap = re(
  146. '('+
  147. '(\\b[A-Z\\d]*'+ // Group 2: Any amount of caps and digits
  148. '[A-Z]\\d*[A-Z]'+ // A cap string must at least include two caps
  149. // (but they can have digits between them)
  150. '[A-Z\\d\']*\\b)'+ // Any amount of caps and digits or dumb apostsrophes
  151. '|(\\b[A-Z]+\\.\\s?'+ // OR: Group 3: Some caps, followed by a '.' and an optional space
  152. '(?:[A-Z]+\\.\\s?)+)'+ // Followed by the same thing at least once more
  153. '(?:\\s|\\b|$)'+
  154. ')', 'g'
  155. );
  156. tokens.forEach( function (token) {
  157. if (token.type === 'tag') {
  158. result.push(token.txt);
  159. close_match = re_skip_tags.exec(token.txt);
  160. if (close_match && close_match[1] === undefined) {
  161. in_skipped_tag = true;
  162. } else {
  163. in_skipped_tag = false;
  164. }
  165. }
  166. else {
  167. if (in_skipped_tag) {
  168. result.push(token.txt);
  169. }
  170. else {
  171. result.push(token.txt.replace(re_cap, function (matched_str, g1, g2, g3) {
  172. // This is necessary to keep dotted cap strings to pick up extra spaces
  173. var caps, tail;
  174. if ( g2 ) {
  175. return '<span class="caps">%s</span>'.replace('%s', g2);
  176. } else {
  177. if ( g3.slice(-1) === ' ' ) {
  178. caps = g3.slice(0, -1);
  179. tail = ' ';
  180. } else {
  181. caps = g3;
  182. tail = '';
  183. }
  184. return '<span class="caps">%s1</span>%s2'.replace('%s1', caps).replace('%s2', tail);
  185. }
  186. }));
  187. }
  188. }
  189. });
  190. return result.join('');
  191. };
  192. /**
  193. * Applies the following filters: widont, smartypants,
  194. * amp, quotes
  195. */
  196. typogr.typogrify = function(src) {
  197. var text = src;
  198. if (src.jquery && src.html) {
  199. text = src.html();
  200. }
  201. text = amp(text);
  202. text = widont(text);
  203. text = smartypants(text);
  204. text = caps(text);
  205. text = initQuotes(text);
  206. text = ord(text);
  207. return text;
  208. };
  209. // SmartyPants functions
  210. // ---------------------
  211. /**
  212. * Translates plain ASCII punctuation characters into
  213. * "smart" typographic punctuation HTML entities.
  214. */
  215. var smartypants = typogr.smartypants = function(text) {
  216. var tokens = tokenize(text)
  217. , result = []
  218. , skipped_tag_stack = []
  219. , skipped_tag = ''
  220. , skip_match = ''
  221. , in_pre = false
  222. // This is a cheat, used to get some context for one-character
  223. // tokens that consist of just a quote char. What we do is remember
  224. // the last character of the previous text token, to use as context
  225. // to curl single-character quote tokens correctly.
  226. , prev_token_last_char = ''
  227. , last_char
  228. // current token
  229. , t;
  230. tokens.forEach( function (token) {
  231. if (token.type === 'tag') {
  232. // Don't mess with quotes inside some tags.
  233. // This does not handle self <closing/> tags!
  234. result.push(token.txt);
  235. // is it a skipped tag ?
  236. if ( (skip_match = re_skip_tags.exec(token.txt)) !== null ) {
  237. skipped_tag = skip_match[2].toLowerCase();
  238. // closing tag
  239. if ( skip_match[1] ) {
  240. if ( skipped_tag_stack.length > 0 ) {
  241. if ( skipped_tag === skipped_tag_stack[skipped_tag_stack.length-1] ) {
  242. skipped_tag_stack.pop();
  243. }
  244. }
  245. if (skipped_tag_stack.length === 0) {
  246. in_pre = false;
  247. }
  248. }
  249. // opening tag
  250. else {
  251. skipped_tag_stack.push(skipped_tag);
  252. in_pre = true;
  253. }
  254. }
  255. } else {
  256. t = token.txt;
  257. // Special case rock ’n’ roll—use apostrophes
  258. t = t.replace(/(rock )'n'( roll)/gi, '$1&#8217;n&#8217;$2');
  259. // Remember last char of this token before processing
  260. last_char = t.slice(-1);
  261. if ( !in_pre ) {
  262. t = smartEscapes(t);
  263. t = smartDashes(t);
  264. t = smartEllipses(t);
  265. // backticks need to be processed before quotes
  266. t = smartBackticks(t);
  267. // quotes
  268. switch(t) {
  269. case "'": // Special case: single-character ' token
  270. if (/\S/.test(prev_token_last_char)) { t = '&#8217;'; }
  271. else { t = '&#8216;'; }
  272. break;
  273. case '"': // Special case: single-character " token
  274. if (/\S/.test(prev_token_last_char)) { t = '&#8221;'; }
  275. else { t = '&#8220;'; }
  276. break;
  277. default: // Normal case
  278. t = smartQuotes(t);
  279. }
  280. }
  281. prev_token_last_char = last_char;
  282. result.push(t);
  283. }
  284. });
  285. return result.join('');
  286. };
  287. /**
  288. * Returns an array of the tokens comprising the input string.
  289. * Each token is either a tag (possibly with nested, tags contained
  290. * therein, such as <a href="<MTFoo>">, or a run of text between tags.
  291. * Each element of the array is an object with properties 'type' and 'txt';
  292. * Values for 'type': 'tag' or 'text'; 'txt' is the actual value.
  293. *
  294. */
  295. var tokenize = typogr.tokenize = function(text) {
  296. var tokens = []
  297. , lastIndex = 0
  298. , re_tag = /([^<]*)(<[^>]*>)/gi
  299. , curr_token;
  300. while ( (curr_token = re_tag.exec(text)) !== null ) {
  301. var pre_text = curr_token[1]
  302. , tag_text = curr_token[2];
  303. if (pre_text) {
  304. tokens.push({ type: 'text', txt: pre_text });
  305. }
  306. tokens.push({ type: 'tag', txt: tag_text });
  307. lastIndex = re_tag.lastIndex;
  308. }
  309. if (re_tag.lastIndex <= text.length) {
  310. // if last char is a dot and not a '…'
  311. // then push two tokens
  312. if (text.slice(-1) == "." && text.slice(-2) != '..' ) {
  313. tokens.push({ type: 'text', txt: text.slice(lastIndex, text.length-1) });
  314. tokens.push({ type: 'text', txt: text.slice(-1) });
  315. }
  316. else {
  317. tokens.push({ type: 'text', txt: text.slice(lastIndex) });
  318. }
  319. }
  320. return tokens;
  321. };
  322. /**
  323. * Returns input string, with after processing the following backslash
  324. * escape sequences. This is useful if you want to force a "dumb"
  325. * quote or other character to appear.
  326. *
  327. */
  328. var smartEscapes = typogr.smartEscapes = function(text) {
  329. return text.replace(/\\"/g, '&#34;')
  330. .replace(/\\'/g, '&#39;')
  331. .replace(/\\-/g, '&#45;')
  332. .replace(/\\\./g, '&#46;')
  333. .replace(/\\\\/g, '&#92;')
  334. .replace(/\\`/g, '&#96;');
  335. };
  336. /**
  337. * Returns input text, with each instance of "--"
  338. * translated to an em-dash HTML entity.
  339. *
  340. */
  341. var smartDashes = typogr.smartDashes = function(text) {
  342. return text.replace(/---/g, '&#8212;') // em (yes, backwards)
  343. .replace(/([^<][^!]|[^!]|^)--(?!>)/g, '$1&#8211;'); // en (yes, backwards)
  344. };
  345. /**
  346. * Returns input string, with each instance of "..."
  347. * translated to an ellipsis HTML entity.
  348. *
  349. */
  350. var smartEllipses = typogr.smartEllipses = function(text) {
  351. return text.replace(/\.\.\./g, '&#8230;')
  352. .replace(/\. \. \./g, '&#8230;');
  353. };
  354. /**
  355. * Returns input string, with ``backticks'' -style double quotes
  356. * translated into HTML curly quote entities.
  357. *
  358. */
  359. var smartBackticks = typogr.smartBackticks = function(text) {
  360. return text.replace(/``/g, '&#8220;')
  361. .replace(/''/g, '&#8221;');
  362. };
  363. /**
  364. * Returns input string, with "educated" curly quote
  365. * HTML entities.
  366. *
  367. */
  368. var smartQuotes = typogr.smartQuotes = function(text) {
  369. var punct_cls = '[!"#\\$\\%\\\'()*+,-.\\/:;<=>?\\@\\[\\\\]\\^_`{|}~]'
  370. , re_punct_str = '(?=%s\\B)'.replace('%s', punct_cls)
  371. , close_cls = '[^\\ \\t\\r\\n\\[\\{\\(\\-]'
  372. , dec_dashes = '&#8211;|&#8212;'
  373. , re_opening_single_quotes = re(
  374. '('+
  375. '\\s|'+ // a whitespace char, or
  376. '&nbsp;|'+ // a non-breaking space entity, or
  377. '--|'+ // dashes, or
  378. '&[mn]dash;|'+ // named dash entities
  379. dec_dashes + '|'+ // or decimal entities
  380. '&#x201[34];'+ // or hex
  381. ')'+
  382. '\''+ // the quote
  383. '(?=\\w)', 'g') // followed by a word character
  384. , re_closing_single_quotes = re(
  385. '('+close_cls+')'+
  386. '\''+ // *
  387. '(?!\\s | s\\b | \\d)' , 'g') // ??? may be: '(?!\s | \s\b | \d)'
  388. , re_closing_single_quotes2 = re(
  389. '('+close_cls+')'+
  390. '\''+ // *
  391. '(?!\\s | s\\b)', 'g') // ??? may be: '(?!\s | \s\b)'
  392. , re_opening_double_quotes = re(
  393. '('+
  394. '\\s|'+ // a whitespace char, or
  395. '&nbsp;|'+ // a non-breaking space entity, or
  396. '--|'+ // dashes, or
  397. '&[mn]dash;|'+ // named dash entities
  398. dec_dashes + '|'+ // or decimal entities
  399. '&#x201[34];'+ // or hex
  400. ')'+
  401. '"'+ // the quote
  402. '(?=\\w)', 'g') // followed by a word character
  403. , re_closing_double_quotes = re('"(?=\\s)' , 'g')
  404. , re_closing_double_quotes2 = re('('+close_cls+')"', 'g');
  405. return text
  406. // Special case if the very first character is a quote
  407. // followed by punctuation at a non-word-break.
  408. // Close the quotes by brute force:
  409. .replace(re("^'%s".replace('%s', re_punct_str), 'g'), '&#8217;')
  410. .replace(re('^"%s'.replace('%s', re_punct_str), 'g'), '&#8221;')
  411. // Special case for double sets of quotes, e.g.:
  412. // <p>He said, "'Quoted' words in a larger quote."</p>
  413. .replace(/"'(?=\w)/g, '&#8220;&#8216;')
  414. .replace(/'"(?=\w)/g, '&#8216;&#8220;')
  415. // Special case for decade abbreviations (the '80s):
  416. .replace(/\b'(?=\d{2}s)/g, '&#8217;')
  417. // Opening single quotes
  418. .replace(re_opening_single_quotes, '$1&#8216;')
  419. // Closing single quotes
  420. .replace(re_closing_single_quotes, '$1&#8217;')
  421. .replace(re_closing_single_quotes2,'$1&#8217;$2')
  422. // Any remaining single quotes should be closing ones
  423. .replace("'", '&#8217;')
  424. // Opening double quotes
  425. .replace(re_opening_double_quotes, '$1&#8220;')
  426. // Closing double quotes
  427. .replace(re_closing_double_quotes, '&#8221;')
  428. .replace(re_closing_double_quotes2,'$1&#8221;')
  429. // Any remaining quotes should be opening ones.
  430. .replace('"', '&#8220;');
  431. };
  432. // OOP internals
  433. // PS: Underscore rulez
  434. // If typogr is called as a function, it returns a wrapped object that
  435. // can be used OO-style. Wrapped objects may be chained
  436. var Wrapper = function(obj) { this._wrapped = obj; };
  437. // Helper function to continue chaining intermediate results.
  438. var result = function(obj, chain) {
  439. return chain ? typogr(obj).chain() : obj;
  440. };
  441. // A method to easily add functions to the OOP wrapper.
  442. var addToWrapper = function(name, func) {
  443. Wrapper.prototype[name] = function() {
  444. return result( func.call(typogr, this._wrapped), this._chain);
  445. };
  446. };
  447. // Is a given value a function?
  448. var isFunction = function(obj) {
  449. return !!(obj && obj.constructor && obj.call && obj.apply);
  450. };
  451. // Add all of the typogr functions to the wrapper object.
  452. var name;
  453. for (name in typogr) {
  454. if ( typogr.hasOwnProperty(name) && isFunction(typogr[name]) ) {
  455. addToWrapper(name, typogr[name]);
  456. }
  457. }
  458. // Start chaining a wrapped typogr object.
  459. Wrapper.prototype.chain = function() {
  460. this._chain = true;
  461. return this;
  462. };
  463. // Extracts the result from a wrapped and chained object.
  464. Wrapper.prototype.value = function() {
  465. return this._wrapped;
  466. };
  467. }(this));