123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529 |
- /*!
- * typogr.js
- * Copyright(c) 2011 Eugene Kalinin
- * MIT Licensed
- */
- (function (root) {
- /** Main typogr function */
- var typogr = function (obj) { return new Wrapper(obj); };
- // Current version
- typogr.version = '0.6.8';
- // Export the typogr object. In server-side for `require()` API.
- // If we're not in CommonJS, add `typogr` to the global object.
- if (typeof module !== 'undefined' && module.exports) {
- module.exports = typogr;
- } else if (typeof define === 'function' && define.amd) {
- define(function() { return typogr; });
- } else {
- root.typogr = typogr;
- }
- // typogr functions
- // ---------------------
- var re = function (regexp, flag) {
- return new RegExp(regexp, flag);
- };
- // RegExp for skip some tags
- var re_skip_tags = /<(\/)?(style|pre|code|kbd|script|math|title)[^>]*>/i;
- /**
- * Wraps apersands in HTML with ``<span class="amp">`` so they can be
- * styled with CSS. Apersands are also normalized to ``&``. Requires
- * ampersands to have whitespace or an `` `` on both sides.
- *
- */
- var amp = typogr.amp = function(text) {
- var re_amp = /(\s| )(&|&|&\#38;)(\s| )/g
- //( $1 )( $2 )( $3 )
- , re_intra_tag = /(<[^<]*>)?([^<]*)(<\/[^<]*>)?/g;
- //( prefix) ( txt )( suffix )
- if( !text && typeof text !== "string" ) {
- return;
- }
- return text.replace(re_intra_tag, function (str, prefix, text, suffix) {
- prefix = prefix || '';
- suffix = suffix || '';
- if (prefix.match(re_skip_tags)) return prefix + text + suffix;
- text = text.replace(re_amp, '$1<span class="amp">&</span>$3');
- return prefix + text + suffix;
- });
- };
- /**
- * Wraps date suffix in <span class="ord"> so they can be styled with CSS.
- *
- */
- var ord = typogr.ord = function(text) {
- if( !text && typeof text !== "string" ) {
- return;
- }
- var tokens = tokenize(text)
- , result = []
- , in_skipped_tag = false
- , close_match
- , re_suffix = /(\d+)(st|nd|rd|th)/g;
- // $1 $2
- tokens.forEach( function (token) {
- if (token.type === 'tag') {
- result.push(token.txt);
- close_match = re_skip_tags.exec(token.txt);
- if (close_match && close_match[1] === undefined) {
- in_skipped_tag = true;
- } else {
- in_skipped_tag = false;
- }
- }
- else {
- if (in_skipped_tag) {
- result.push(token.txt);
- }
- else {
- result.push(token.txt.replace(re_suffix, '$1<span class="ord">$2</span>'));
- }
- }
- });
- return result.join('');
- };
- /**
- * Wraps initial quotes in ``class="dquo"`` for double quotes or ``class="quo"``
- * for single quotes. Works in these block tags ``(h1-h6, p, li, dt, dd)``
- * and also accounts for potential opening inline elements ``a, em, strong, span, b, i``
- *
- */
- var initQuotes = typogr.initQuotes = function(text) {
- var re_quote = re(
- '(?:(?:<(?:p|h[1-6]|li|dt|dd)[^>]*>|^)'+ // start with an opening
- // p, h1-6, li, dd, dt
- // or the start of the string
- '\\s*'+ // optional white space!
- '(?:<(?:a|em|span|strong|i|b)[^>]*>\\s*)*)'+//optional opening inline tags,
- // with more optional white space for each.
- '(?:("|“|“)|'+ // Find me a quote! /only need to find
- '(\'|‘|‘))' // the left quotes and the primes/
- , 'i');
- if( !text && typeof text !== "string" ) {
- return;
- }
- return text.replace(re_quote, function (matched_str, dquo, squo) {
- var classname = dquo ? "dquo" : "quo"
- , quote = dquo ? dquo : squo;
- return [matched_str.slice(0, matched_str.lastIndexOf(quote)), // all before quote
- '<span class="', classname, '">', quote, '</span>'].join('');
- });
- };
- /**
- * Replaces the space between the last two words in a string with `` ``
- * Works in these block tags ``(h1-h6, p, li, dd, dt)`` and also accounts for
- * potential closing inline elements ``a, em, strong, span, b, i``
- *
- */
- var widont = typogr.widont = function(text) {
- var inline_tags = 'a|em|span|strong|i|b'
- var word = '(?:<(?:'+inline_tags+')[^>]*?>)*?[^\\s<>]+(?:</(?:'+inline_tags+')[^>]*?>)*?'
- var re_widont = re(
- '('+ // matching group 1
- '\\s+'+word+ // space and a word with a possible bordering tag
- '\\s+'+word+ // space and a word with a possible bordering tag
- ')'+
- '(?:\\s+)'+ // one or more space characters
- '('+ // matching group 2
- '[^<>\\s]+'+ // nontag/nonspace characters
- '(?:\\s*</(?:a|em|span|strong|i|b)[^>]*?>\\s*\\.*)*?'+ // one or more inline closing tags
- // can be surronded by spaces
- // and followed by a period.
- '(?:\\s*?</(?:p|h[1-6]|li|dt|dd)>|$)'+ // allowed closing tags or end of line
- ')', 'gi');
- return text.replace(re_widont, '$1<span class="widont"> </span>$2');
- };
- /**
- * Wraps multiple capital letters in ``<span class="caps">``
- * so they can be styled with CSS.
- *
- */
- var caps = typogr.caps = function(text) {
- var tokens = tokenize(text)
- , result = []
- , in_skipped_tag = false
- , close_match
- , re_cap = re(
- '('+
- '(\\b[A-Z\\d]*'+ // Group 2: Any amount of caps and digits
- '[A-Z]\\d*[A-Z]'+ // A cap string must at least include two caps
- // (but they can have digits between them)
- '[A-Z\\d\']*\\b)'+ // Any amount of caps and digits or dumb apostsrophes
- '|(\\b[A-Z]+\\.\\s?'+ // OR: Group 3: Some caps, followed by a '.' and an optional space
- '(?:[A-Z]+\\.\\s?)+)'+ // Followed by the same thing at least once more
- '(?:\\s|\\b|$)'+
- ')', 'g'
- );
- tokens.forEach( function (token) {
- if (token.type === 'tag') {
- result.push(token.txt);
- close_match = re_skip_tags.exec(token.txt);
- if (close_match && close_match[1] === undefined) {
- in_skipped_tag = true;
- } else {
- in_skipped_tag = false;
- }
- }
- else {
- if (in_skipped_tag) {
- result.push(token.txt);
- }
- else {
- result.push(token.txt.replace(re_cap, function (matched_str, g1, g2, g3) {
- // This is necessary to keep dotted cap strings to pick up extra spaces
- var caps, tail;
- if ( g2 ) {
- return '<span class="caps">%s</span>'.replace('%s', g2);
- } else {
- if ( g3.slice(-1) === ' ' ) {
- caps = g3.slice(0, -1);
- tail = ' ';
- } else {
- caps = g3;
- tail = '';
- }
- return '<span class="caps">%s1</span>%s2'.replace('%s1', caps).replace('%s2', tail);
- }
- }));
- }
- }
- });
- return result.join('');
- };
- /**
- * Applies the following filters: widont, smartypants,
- * amp, quotes
- */
- typogr.typogrify = function(src) {
- var text = src;
- if (src.jquery && src.html) {
- text = src.html();
- }
- text = amp(text);
- text = widont(text);
- text = smartypants(text);
- text = caps(text);
- text = initQuotes(text);
- text = ord(text);
- return text;
- };
- // SmartyPants functions
- // ---------------------
- /**
- * Translates plain ASCII punctuation characters into
- * "smart" typographic punctuation HTML entities.
- */
- var smartypants = typogr.smartypants = function(text) {
- var tokens = tokenize(text)
- , result = []
- , skipped_tag_stack = []
- , skipped_tag = ''
- , skip_match = ''
- , in_pre = false
- // This is a cheat, used to get some context for one-character
- // tokens that consist of just a quote char. What we do is remember
- // the last character of the previous text token, to use as context
- // to curl single-character quote tokens correctly.
- , prev_token_last_char = ''
- , last_char
- // current token
- , t;
- tokens.forEach( function (token) {
- if (token.type === 'tag') {
- // Don't mess with quotes inside some tags.
- // This does not handle self <closing/> tags!
- result.push(token.txt);
- // is it a skipped tag ?
- if ( (skip_match = re_skip_tags.exec(token.txt)) !== null ) {
- skipped_tag = skip_match[2].toLowerCase();
- // closing tag
- if ( skip_match[1] ) {
- if ( skipped_tag_stack.length > 0 ) {
- if ( skipped_tag === skipped_tag_stack[skipped_tag_stack.length-1] ) {
- skipped_tag_stack.pop();
- }
- }
- if (skipped_tag_stack.length === 0) {
- in_pre = false;
- }
- }
- // opening tag
- else {
- skipped_tag_stack.push(skipped_tag);
- in_pre = true;
- }
- }
- } else {
- t = token.txt;
- // Special case rock ’n’ roll—use apostrophes
- t = t.replace(/(rock )'n'( roll)/gi, '$1’n’$2');
- // Remember last char of this token before processing
- last_char = t.slice(-1);
- if ( !in_pre ) {
- t = smartEscapes(t);
- t = smartDashes(t);
- t = smartEllipses(t);
- // backticks need to be processed before quotes
- t = smartBackticks(t);
- // quotes
- switch(t) {
- case "'": // Special case: single-character ' token
- if (/\S/.test(prev_token_last_char)) { t = '’'; }
- else { t = '‘'; }
- break;
- case '"': // Special case: single-character " token
- if (/\S/.test(prev_token_last_char)) { t = '”'; }
- else { t = '“'; }
- break;
- default: // Normal case
- t = smartQuotes(t);
- }
- }
- prev_token_last_char = last_char;
- result.push(t);
- }
- });
- return result.join('');
- };
- /**
- * Returns an array of the tokens comprising the input string.
- * Each token is either a tag (possibly with nested, tags contained
- * therein, such as <a href="<MTFoo>">, or a run of text between tags.
- * Each element of the array is an object with properties 'type' and 'txt';
- * Values for 'type': 'tag' or 'text'; 'txt' is the actual value.
- *
- */
- var tokenize = typogr.tokenize = function(text) {
- var tokens = []
- , lastIndex = 0
- , re_tag = /([^<]*)(<[^>]*>)/gi
- , curr_token;
- while ( (curr_token = re_tag.exec(text)) !== null ) {
- var pre_text = curr_token[1]
- , tag_text = curr_token[2];
- if (pre_text) {
- tokens.push({ type: 'text', txt: pre_text });
- }
- tokens.push({ type: 'tag', txt: tag_text });
- lastIndex = re_tag.lastIndex;
- }
- if (re_tag.lastIndex <= text.length) {
- // if last char is a dot and not a '…'
- // then push two tokens
- if (text.slice(-1) == "." && text.slice(-2) != '..' ) {
- tokens.push({ type: 'text', txt: text.slice(lastIndex, text.length-1) });
- tokens.push({ type: 'text', txt: text.slice(-1) });
- }
- else {
- tokens.push({ type: 'text', txt: text.slice(lastIndex) });
- }
- }
- return tokens;
- };
- /**
- * Returns input string, with after processing the following backslash
- * escape sequences. This is useful if you want to force a "dumb"
- * quote or other character to appear.
- *
- */
- var smartEscapes = typogr.smartEscapes = function(text) {
- return text.replace(/\\"/g, '"')
- .replace(/\\'/g, ''')
- .replace(/\\-/g, '-')
- .replace(/\\\./g, '.')
- .replace(/\\\\/g, '\')
- .replace(/\\`/g, '`');
- };
- /**
- * Returns input text, with each instance of "--"
- * translated to an em-dash HTML entity.
- *
- */
- var smartDashes = typogr.smartDashes = function(text) {
- return text.replace(/---/g, '—') // em (yes, backwards)
- .replace(/([^<][^!]|[^!]|^)--(?!>)/g, '$1–'); // en (yes, backwards)
- };
- /**
- * Returns input string, with each instance of "..."
- * translated to an ellipsis HTML entity.
- *
- */
- var smartEllipses = typogr.smartEllipses = function(text) {
- return text.replace(/\.\.\./g, '…')
- .replace(/\. \. \./g, '…');
- };
- /**
- * Returns input string, with ``backticks'' -style double quotes
- * translated into HTML curly quote entities.
- *
- */
- var smartBackticks = typogr.smartBackticks = function(text) {
- return text.replace(/``/g, '“')
- .replace(/''/g, '”');
- };
- /**
- * Returns input string, with "educated" curly quote
- * HTML entities.
- *
- */
- var smartQuotes = typogr.smartQuotes = function(text) {
- var punct_cls = '[!"#\\$\\%\\\'()*+,-.\\/:;<=>?\\@\\[\\\\]\\^_`{|}~]'
- , re_punct_str = '(?=%s\\B)'.replace('%s', punct_cls)
- , close_cls = '[^\\ \\t\\r\\n\\[\\{\\(\\-]'
- , dec_dashes = '–|—'
- , re_opening_single_quotes = re(
- '('+
- '\\s|'+ // a whitespace char, or
- ' |'+ // a non-breaking space entity, or
- '--|'+ // dashes, or
- '&[mn]dash;|'+ // named dash entities
- dec_dashes + '|'+ // or decimal entities
- 'ȁ[34];'+ // or hex
- ')'+
- '\''+ // the quote
- '(?=\\w)', 'g') // followed by a word character
- , re_closing_single_quotes = re(
- '('+close_cls+')'+
- '\''+ // *
- '(?!\\s | s\\b | \\d)' , 'g') // ??? may be: '(?!\s | \s\b | \d)'
- , re_closing_single_quotes2 = re(
- '('+close_cls+')'+
- '\''+ // *
- '(?!\\s | s\\b)', 'g') // ??? may be: '(?!\s | \s\b)'
- , re_opening_double_quotes = re(
- '('+
- '\\s|'+ // a whitespace char, or
- ' |'+ // a non-breaking space entity, or
- '--|'+ // dashes, or
- '&[mn]dash;|'+ // named dash entities
- dec_dashes + '|'+ // or decimal entities
- 'ȁ[34];'+ // or hex
- ')'+
- '"'+ // the quote
- '(?=\\w)', 'g') // followed by a word character
- , re_closing_double_quotes = re('"(?=\\s)' , 'g')
- , re_closing_double_quotes2 = re('('+close_cls+')"', 'g');
- return text
- // Special case if the very first character is a quote
- // followed by punctuation at a non-word-break.
- // Close the quotes by brute force:
- .replace(re("^'%s".replace('%s', re_punct_str), 'g'), '’')
- .replace(re('^"%s'.replace('%s', re_punct_str), 'g'), '”')
- // Special case for double sets of quotes, e.g.:
- // <p>He said, "'Quoted' words in a larger quote."</p>
- .replace(/"'(?=\w)/g, '“‘')
- .replace(/'"(?=\w)/g, '‘“')
- // Special case for decade abbreviations (the '80s):
- .replace(/\b'(?=\d{2}s)/g, '’')
- // Opening single quotes
- .replace(re_opening_single_quotes, '$1‘')
- // Closing single quotes
- .replace(re_closing_single_quotes, '$1’')
- .replace(re_closing_single_quotes2,'$1’$2')
- // Any remaining single quotes should be closing ones
- .replace("'", '’')
- // Opening double quotes
- .replace(re_opening_double_quotes, '$1“')
- // Closing double quotes
- .replace(re_closing_double_quotes, '”')
- .replace(re_closing_double_quotes2,'$1”')
- // Any remaining quotes should be opening ones.
- .replace('"', '“');
- };
- // OOP internals
- // PS: Underscore rulez
- // If typogr is called as a function, it returns a wrapped object that
- // can be used OO-style. Wrapped objects may be chained
- var Wrapper = function(obj) { this._wrapped = obj; };
- // Helper function to continue chaining intermediate results.
- var result = function(obj, chain) {
- return chain ? typogr(obj).chain() : obj;
- };
- // A method to easily add functions to the OOP wrapper.
- var addToWrapper = function(name, func) {
- Wrapper.prototype[name] = function() {
- return result( func.call(typogr, this._wrapped), this._chain);
- };
- };
- // Is a given value a function?
- var isFunction = function(obj) {
- return !!(obj && obj.constructor && obj.call && obj.apply);
- };
- // Add all of the typogr functions to the wrapper object.
- var name;
- for (name in typogr) {
- if ( typogr.hasOwnProperty(name) && isFunction(typogr[name]) ) {
- addToWrapper(name, typogr[name]);
- }
- }
- // Start chaining a wrapped typogr object.
- Wrapper.prototype.chain = function() {
- this._chain = true;
- return this;
- };
- // Extracts the result from a wrapped and chained object.
- Wrapper.prototype.value = function() {
- return this._wrapped;
- };
- }(this));
|