123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664 |
- 'use strict';
- ////////////////////////////////////////////////////////////////////////////////
- // Helpers
- // Merge objects
- //
- function assign(obj /*from1, from2, from3, ...*/) {
- var sources = Array.prototype.slice.call(arguments, 1);
- sources.forEach(function (source) {
- if (!source) { return; }
- Object.keys(source).forEach(function (key) {
- obj[key] = source[key];
- });
- });
- return obj;
- }
- function _class(obj) { return Object.prototype.toString.call(obj); }
- function isString(obj) { return _class(obj) === '[object String]'; }
- function isObject(obj) { return _class(obj) === '[object Object]'; }
- function isRegExp(obj) { return _class(obj) === '[object RegExp]'; }
- function isFunction(obj) { return _class(obj) === '[object Function]'; }
- function escapeRE(str) { return str.replace(/[.?*+^$[\]\\(){}|-]/g, '\\$&'); }
- ////////////////////////////////////////////////////////////////////////////////
- var defaultOptions = {
- fuzzyLink: true,
- fuzzyEmail: true,
- fuzzyIP: false
- };
- function isOptionsObj(obj) {
- return Object.keys(obj || {}).reduce(function (acc, k) {
- return acc || defaultOptions.hasOwnProperty(k);
- }, false);
- }
- var defaultSchemas = {
- 'http:': {
- validate: function (text, pos, self) {
- var tail = text.slice(pos);
- if (!self.re.http) {
- // compile lazily, because "host"-containing variables can change on tlds update.
- self.re.http = new RegExp(
- '^\\/\\/' + self.re.src_auth + self.re.src_host_port_strict + self.re.src_path, 'i'
- );
- }
- if (self.re.http.test(tail)) {
- return tail.match(self.re.http)[0].length;
- }
- return 0;
- }
- },
- 'https:': 'http:',
- 'ftp:': 'http:',
- '//': {
- validate: function (text, pos, self) {
- var tail = text.slice(pos);
- if (!self.re.no_http) {
- // compile lazily, because "host"-containing variables can change on tlds update.
- self.re.no_http = new RegExp(
- '^' +
- self.re.src_auth +
- // Don't allow single-level domains, because of false positives like '//test'
- // with code comments
- '(?:localhost|(?:(?:' + self.re.src_domain + ')\\.)+' + self.re.src_domain_root + ')' +
- self.re.src_port +
- self.re.src_host_terminator +
- self.re.src_path,
- 'i'
- );
- }
- if (self.re.no_http.test(tail)) {
- // should not be `://` & `///`, that protects from errors in protocol name
- if (pos >= 3 && text[pos - 3] === ':') { return 0; }
- if (pos >= 3 && text[pos - 3] === '/') { return 0; }
- return tail.match(self.re.no_http)[0].length;
- }
- return 0;
- }
- },
- 'mailto:': {
- validate: function (text, pos, self) {
- var tail = text.slice(pos);
- if (!self.re.mailto) {
- self.re.mailto = new RegExp(
- '^' + self.re.src_email_name + '@' + self.re.src_host_strict, 'i'
- );
- }
- if (self.re.mailto.test(tail)) {
- return tail.match(self.re.mailto)[0].length;
- }
- return 0;
- }
- }
- };
- /*eslint-disable max-len*/
- // RE pattern for 2-character tlds (autogenerated by ./support/tlds_2char_gen.js)
- var tlds_2ch_src_re = 'a[cdefgilmnoqrstuwxz]|b[abdefghijmnorstvwyz]|c[acdfghiklmnoruvwxyz]|d[ejkmoz]|e[cegrstu]|f[ijkmor]|g[abdefghilmnpqrstuwy]|h[kmnrtu]|i[delmnoqrst]|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]|m[acdeghklmnopqrstuvwxyz]|n[acefgilopruz]|om|p[aefghklmnrstwy]|qa|r[eosuw]|s[abcdeghijklmnortuvxyz]|t[cdfghjklmnortvwz]|u[agksyz]|v[aceginu]|w[fs]|y[et]|z[amw]';
- // DON'T try to make PRs with changes. Extend TLDs with LinkifyIt.tlds() instead
- var tlds_default = 'biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф'.split('|');
- /*eslint-enable max-len*/
- ////////////////////////////////////////////////////////////////////////////////
- function resetScanCache(self) {
- self.__index__ = -1;
- self.__text_cache__ = '';
- }
- function createValidator(re) {
- return function (text, pos) {
- var tail = text.slice(pos);
- if (re.test(tail)) {
- return tail.match(re)[0].length;
- }
- return 0;
- };
- }
- function createNormalizer() {
- return function (match, self) {
- self.normalize(match);
- };
- }
- // Schemas compiler. Build regexps.
- //
- function compile(self) {
- // Load & clone RE patterns.
- var re = self.re = require('./lib/re')(self.__opts__);
- // Define dynamic patterns
- var tlds = self.__tlds__.slice();
- self.onCompile();
- if (!self.__tlds_replaced__) {
- tlds.push(tlds_2ch_src_re);
- }
- tlds.push(re.src_xn);
- re.src_tlds = tlds.join('|');
- function untpl(tpl) { return tpl.replace('%TLDS%', re.src_tlds); }
- re.email_fuzzy = RegExp(untpl(re.tpl_email_fuzzy), 'i');
- re.link_fuzzy = RegExp(untpl(re.tpl_link_fuzzy), 'i');
- re.link_no_ip_fuzzy = RegExp(untpl(re.tpl_link_no_ip_fuzzy), 'i');
- re.host_fuzzy_test = RegExp(untpl(re.tpl_host_fuzzy_test), 'i');
- //
- // Compile each schema
- //
- var aliases = [];
- self.__compiled__ = {}; // Reset compiled data
- function schemaError(name, val) {
- throw new Error('(LinkifyIt) Invalid schema "' + name + '": ' + val);
- }
- Object.keys(self.__schemas__).forEach(function (name) {
- var val = self.__schemas__[name];
- // skip disabled methods
- if (val === null) { return; }
- var compiled = { validate: null, link: null };
- self.__compiled__[name] = compiled;
- if (isObject(val)) {
- if (isRegExp(val.validate)) {
- compiled.validate = createValidator(val.validate);
- } else if (isFunction(val.validate)) {
- compiled.validate = val.validate;
- } else {
- schemaError(name, val);
- }
- if (isFunction(val.normalize)) {
- compiled.normalize = val.normalize;
- } else if (!val.normalize) {
- compiled.normalize = createNormalizer();
- } else {
- schemaError(name, val);
- }
- return;
- }
- if (isString(val)) {
- aliases.push(name);
- return;
- }
- schemaError(name, val);
- });
- //
- // Compile postponed aliases
- //
- aliases.forEach(function (alias) {
- if (!self.__compiled__[self.__schemas__[alias]]) {
- // Silently fail on missed schemas to avoid errons on disable.
- // schemaError(alias, self.__schemas__[alias]);
- return;
- }
- self.__compiled__[alias].validate =
- self.__compiled__[self.__schemas__[alias]].validate;
- self.__compiled__[alias].normalize =
- self.__compiled__[self.__schemas__[alias]].normalize;
- });
- //
- // Fake record for guessed links
- //
- self.__compiled__[''] = { validate: null, normalize: createNormalizer() };
- //
- // Build schema condition
- //
- var slist = Object.keys(self.__compiled__)
- .filter(function (name) {
- // Filter disabled & fake schemas
- return name.length > 0 && self.__compiled__[name];
- })
- .map(escapeRE)
- .join('|');
- // (?!_) cause 1.5x slowdown
- self.re.schema_test = RegExp('(^|(?!_)(?:[><\uff5c]|' + re.src_ZPCc + '))(' + slist + ')', 'i');
- self.re.schema_search = RegExp('(^|(?!_)(?:[><\uff5c]|' + re.src_ZPCc + '))(' + slist + ')', 'ig');
- self.re.schema_at_start = RegExp('^' + self.re.schema_search.source, 'i');
- self.re.pretest = RegExp(
- '(' + self.re.schema_test.source + ')|(' + self.re.host_fuzzy_test.source + ')|@',
- 'i'
- );
- //
- // Cleanup
- //
- resetScanCache(self);
- }
- /**
- * class Match
- *
- * Match result. Single element of array, returned by [[LinkifyIt#match]]
- **/
- function Match(self, shift) {
- var start = self.__index__,
- end = self.__last_index__,
- text = self.__text_cache__.slice(start, end);
- /**
- * Match#schema -> String
- *
- * Prefix (protocol) for matched string.
- **/
- this.schema = self.__schema__.toLowerCase();
- /**
- * Match#index -> Number
- *
- * First position of matched string.
- **/
- this.index = start + shift;
- /**
- * Match#lastIndex -> Number
- *
- * Next position after matched string.
- **/
- this.lastIndex = end + shift;
- /**
- * Match#raw -> String
- *
- * Matched string.
- **/
- this.raw = text;
- /**
- * Match#text -> String
- *
- * Notmalized text of matched string.
- **/
- this.text = text;
- /**
- * Match#url -> String
- *
- * Normalized url of matched string.
- **/
- this.url = text;
- }
- function createMatch(self, shift) {
- var match = new Match(self, shift);
- self.__compiled__[match.schema].normalize(match, self);
- return match;
- }
- /**
- * class LinkifyIt
- **/
- /**
- * new LinkifyIt(schemas, options)
- * - schemas (Object): Optional. Additional schemas to validate (prefix/validator)
- * - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
- *
- * Creates new linkifier instance with optional additional schemas.
- * Can be called without `new` keyword for convenience.
- *
- * By default understands:
- *
- * - `http(s)://...` , `ftp://...`, `mailto:...` & `//...` links
- * - "fuzzy" links and emails (example.com, foo@bar.com).
- *
- * `schemas` is an object, where each key/value describes protocol/rule:
- *
- * - __key__ - link prefix (usually, protocol name with `:` at the end, `skype:`
- * for example). `linkify-it` makes shure that prefix is not preceeded with
- * alphanumeric char and symbols. Only whitespaces and punctuation allowed.
- * - __value__ - rule to check tail after link prefix
- * - _String_ - just alias to existing rule
- * - _Object_
- * - _validate_ - validator function (should return matched length on success),
- * or `RegExp`.
- * - _normalize_ - optional function to normalize text & url of matched result
- * (for example, for @twitter mentions).
- *
- * `options`:
- *
- * - __fuzzyLink__ - recognige URL-s without `http(s):` prefix. Default `true`.
- * - __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts
- * like version numbers. Default `false`.
- * - __fuzzyEmail__ - recognize emails without `mailto:` prefix.
- *
- **/
- function LinkifyIt(schemas, options) {
- if (!(this instanceof LinkifyIt)) {
- return new LinkifyIt(schemas, options);
- }
- if (!options) {
- if (isOptionsObj(schemas)) {
- options = schemas;
- schemas = {};
- }
- }
- this.__opts__ = assign({}, defaultOptions, options);
- // Cache last tested result. Used to skip repeating steps on next `match` call.
- this.__index__ = -1;
- this.__last_index__ = -1; // Next scan position
- this.__schema__ = '';
- this.__text_cache__ = '';
- this.__schemas__ = assign({}, defaultSchemas, schemas);
- this.__compiled__ = {};
- this.__tlds__ = tlds_default;
- this.__tlds_replaced__ = false;
- this.re = {};
- compile(this);
- }
- /** chainable
- * LinkifyIt#add(schema, definition)
- * - schema (String): rule name (fixed pattern prefix)
- * - definition (String|RegExp|Object): schema definition
- *
- * Add new rule definition. See constructor description for details.
- **/
- LinkifyIt.prototype.add = function add(schema, definition) {
- this.__schemas__[schema] = definition;
- compile(this);
- return this;
- };
- /** chainable
- * LinkifyIt#set(options)
- * - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
- *
- * Set recognition options for links without schema.
- **/
- LinkifyIt.prototype.set = function set(options) {
- this.__opts__ = assign(this.__opts__, options);
- return this;
- };
- /**
- * LinkifyIt#test(text) -> Boolean
- *
- * Searches linkifiable pattern and returns `true` on success or `false` on fail.
- **/
- LinkifyIt.prototype.test = function test(text) {
- // Reset scan cache
- this.__text_cache__ = text;
- this.__index__ = -1;
- if (!text.length) { return false; }
- var m, ml, me, len, shift, next, re, tld_pos, at_pos;
- // try to scan for link with schema - that's the most simple rule
- if (this.re.schema_test.test(text)) {
- re = this.re.schema_search;
- re.lastIndex = 0;
- while ((m = re.exec(text)) !== null) {
- len = this.testSchemaAt(text, m[2], re.lastIndex);
- if (len) {
- this.__schema__ = m[2];
- this.__index__ = m.index + m[1].length;
- this.__last_index__ = m.index + m[0].length + len;
- break;
- }
- }
- }
- if (this.__opts__.fuzzyLink && this.__compiled__['http:']) {
- // guess schemaless links
- tld_pos = text.search(this.re.host_fuzzy_test);
- if (tld_pos >= 0) {
- // if tld is located after found link - no need to check fuzzy pattern
- if (this.__index__ < 0 || tld_pos < this.__index__) {
- if ((ml = text.match(this.__opts__.fuzzyIP ? this.re.link_fuzzy : this.re.link_no_ip_fuzzy)) !== null) {
- shift = ml.index + ml[1].length;
- if (this.__index__ < 0 || shift < this.__index__) {
- this.__schema__ = '';
- this.__index__ = shift;
- this.__last_index__ = ml.index + ml[0].length;
- }
- }
- }
- }
- }
- if (this.__opts__.fuzzyEmail && this.__compiled__['mailto:']) {
- // guess schemaless emails
- at_pos = text.indexOf('@');
- if (at_pos >= 0) {
- // We can't skip this check, because this cases are possible:
- // 192.168.1.1@gmail.com, my.in@example.com
- if ((me = text.match(this.re.email_fuzzy)) !== null) {
- shift = me.index + me[1].length;
- next = me.index + me[0].length;
- if (this.__index__ < 0 || shift < this.__index__ ||
- (shift === this.__index__ && next > this.__last_index__)) {
- this.__schema__ = 'mailto:';
- this.__index__ = shift;
- this.__last_index__ = next;
- }
- }
- }
- }
- return this.__index__ >= 0;
- };
- /**
- * LinkifyIt#pretest(text) -> Boolean
- *
- * Very quick check, that can give false positives. Returns true if link MAY BE
- * can exists. Can be used for speed optimization, when you need to check that
- * link NOT exists.
- **/
- LinkifyIt.prototype.pretest = function pretest(text) {
- return this.re.pretest.test(text);
- };
- /**
- * LinkifyIt#testSchemaAt(text, name, position) -> Number
- * - text (String): text to scan
- * - name (String): rule (schema) name
- * - position (Number): text offset to check from
- *
- * Similar to [[LinkifyIt#test]] but checks only specific protocol tail exactly
- * at given position. Returns length of found pattern (0 on fail).
- **/
- LinkifyIt.prototype.testSchemaAt = function testSchemaAt(text, schema, pos) {
- // If not supported schema check requested - terminate
- if (!this.__compiled__[schema.toLowerCase()]) {
- return 0;
- }
- return this.__compiled__[schema.toLowerCase()].validate(text, pos, this);
- };
- /**
- * LinkifyIt#match(text) -> Array|null
- *
- * Returns array of found link descriptions or `null` on fail. We strongly
- * recommend to use [[LinkifyIt#test]] first, for best speed.
- *
- * ##### Result match description
- *
- * - __schema__ - link schema, can be empty for fuzzy links, or `//` for
- * protocol-neutral links.
- * - __index__ - offset of matched text
- * - __lastIndex__ - index of next char after mathch end
- * - __raw__ - matched text
- * - __text__ - normalized text
- * - __url__ - link, generated from matched text
- **/
- LinkifyIt.prototype.match = function match(text) {
- var shift = 0, result = [];
- // Try to take previous element from cache, if .test() called before
- if (this.__index__ >= 0 && this.__text_cache__ === text) {
- result.push(createMatch(this, shift));
- shift = this.__last_index__;
- }
- // Cut head if cache was used
- var tail = shift ? text.slice(shift) : text;
- // Scan string until end reached
- while (this.test(tail)) {
- result.push(createMatch(this, shift));
- tail = tail.slice(this.__last_index__);
- shift += this.__last_index__;
- }
- if (result.length) {
- return result;
- }
- return null;
- };
- /**
- * LinkifyIt#matchAtStart(text) -> Match|null
- *
- * Returns fully-formed (not fuzzy) link if it starts at the beginning
- * of the string, and null otherwise.
- **/
- LinkifyIt.prototype.matchAtStart = function matchAtStart(text) {
- // Reset scan cache
- this.__text_cache__ = text;
- this.__index__ = -1;
- if (!text.length) return null;
- var m = this.re.schema_at_start.exec(text);
- if (!m) return null;
- var len = this.testSchemaAt(text, m[2], m[0].length);
- if (!len) return null;
- this.__schema__ = m[2];
- this.__index__ = m.index + m[1].length;
- this.__last_index__ = m.index + m[0].length + len;
- return createMatch(this, 0);
- };
- /** chainable
- * LinkifyIt#tlds(list [, keepOld]) -> this
- * - list (Array): list of tlds
- * - keepOld (Boolean): merge with current list if `true` (`false` by default)
- *
- * Load (or merge) new tlds list. Those are user for fuzzy links (without prefix)
- * to avoid false positives. By default this algorythm used:
- *
- * - hostname with any 2-letter root zones are ok.
- * - biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф
- * are ok.
- * - encoded (`xn--...`) root zones are ok.
- *
- * If list is replaced, then exact match for 2-chars root zones will be checked.
- **/
- LinkifyIt.prototype.tlds = function tlds(list, keepOld) {
- list = Array.isArray(list) ? list : [ list ];
- if (!keepOld) {
- this.__tlds__ = list.slice();
- this.__tlds_replaced__ = true;
- compile(this);
- return this;
- }
- this.__tlds__ = this.__tlds__.concat(list)
- .sort()
- .filter(function (el, idx, arr) {
- return el !== arr[idx - 1];
- })
- .reverse();
- compile(this);
- return this;
- };
- /**
- * LinkifyIt#normalize(match)
- *
- * Default normalizer (if schema does not define it's own).
- **/
- LinkifyIt.prototype.normalize = function normalize(match) {
- // Do minimal possible changes by default. Need to collect feedback prior
- // to move forward https://github.com/markdown-it/linkify-it/issues/1
- if (!match.schema) { match.url = 'http://' + match.url; }
- if (match.schema === 'mailto:' && !/^mailto:/i.test(match.url)) {
- match.url = 'mailto:' + match.url;
- }
- };
- /**
- * LinkifyIt#onCompile()
- *
- * Override to modify basic RegExp-s.
- **/
- LinkifyIt.prototype.onCompile = function onCompile() {
- };
- module.exports = LinkifyIt;
|