| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183 | 'use strict';module.exports = function (opts) {  var re = {};  opts = opts || {};  // Use direct extract instead of `regenerate` to reduse browserified size  re.src_Any = require('uc.micro/properties/Any/regex').source;  re.src_Cc  = require('uc.micro/categories/Cc/regex').source;  re.src_Z   = require('uc.micro/categories/Z/regex').source;  re.src_P   = require('uc.micro/categories/P/regex').source;  // \p{\Z\P\Cc\CF} (white spaces + control + format + punctuation)  re.src_ZPCc = [ re.src_Z, re.src_P, re.src_Cc ].join('|');  // \p{\Z\Cc} (white spaces + control)  re.src_ZCc = [ re.src_Z, re.src_Cc ].join('|');  // Experimental. List of chars, completely prohibited in links  // because can separate it from other part of text  var text_separators = '[><\uff5c]';  // All possible word characters (everything without punctuation, spaces & controls)  // Defined via punctuation & spaces to save space  // Should be something like \p{\L\N\S\M} (\w but without `_`)  re.src_pseudo_letter       = '(?:(?!' + text_separators + '|' + re.src_ZPCc + ')' + re.src_Any + ')';  // The same as abothe but without [0-9]  // var src_pseudo_letter_non_d = '(?:(?![0-9]|' + src_ZPCc + ')' + src_Any + ')';  ////////////////////////////////////////////////////////////////////////////////  re.src_ip4 =    '(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)';  // Prohibit any of "@/[]()" in user/pass to avoid wrong domain fetch.  re.src_auth    = '(?:(?:(?!' + re.src_ZCc + '|[@/\\[\\]()]).)+@)?';  re.src_port =    '(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?';  re.src_host_terminator =    '(?=$|' + text_separators + '|' + re.src_ZPCc + ')' +    '(?!' + (opts['---'] ? '-(?!--)|' : '-|') + '_|:\\d|\\.-|\\.(?!$|' + re.src_ZPCc + '))';  re.src_path =    '(?:' +      '[/?#]' +        '(?:' +          '(?!' + re.src_ZCc + '|' + text_separators + '|[()[\\]{}.,"\'?!\\-;]).|' +          '\\[(?:(?!' + re.src_ZCc + '|\\]).)*\\]|' +          '\\((?:(?!' + re.src_ZCc + '|[)]).)*\\)|' +          '\\{(?:(?!' + re.src_ZCc + '|[}]).)*\\}|' +          '\\"(?:(?!' + re.src_ZCc + '|["]).)+\\"|' +          "\\'(?:(?!" + re.src_ZCc + "|[']).)+\\'|" +          "\\'(?=" + re.src_pseudo_letter + '|[-])|' +  // allow `I'm_king` if no pair found          '\\.{2,}[a-zA-Z0-9%/&]|' + // google has many dots in "google search" links (#66, #81).                                     // github has ... in commit range links,                                     // Restrict to                                     // - english                                     // - percent-encoded                                     // - parts of file path                                     // - params separator                                     // until more examples found.          '\\.(?!' + re.src_ZCc + '|[.]|$)|' +          (opts['---'] ?            '\\-(?!--(?:[^-]|$))(?:-*)|' // `---` => long dash, terminate            :            '\\-+|'          ) +          ',(?!' + re.src_ZCc + '|$)|' +       // allow `,,,` in paths          ';(?!' + re.src_ZCc + '|$)|' +       // allow `;` if not followed by space-like char          '\\!+(?!' + re.src_ZCc + '|[!]|$)|' +  // allow `!!!` in paths, but not at the end          '\\?(?!' + re.src_ZCc + '|[?]|$)' +        ')+' +      '|\\/' +    ')?';  // Allow anything in markdown spec, forbid quote (") at the first position  // because emails enclosed in quotes are far more common  re.src_email_name =    '[\\-;:&=\\+\\$,\\.a-zA-Z0-9_][\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]*';  re.src_xn =    'xn--[a-z0-9\\-]{1,59}';  // More to read about domain names  // http://serverfault.com/questions/638260/  re.src_domain_root =    // Allow letters & digits (http://test1)    '(?:' +      re.src_xn +      '|' +      re.src_pseudo_letter + '{1,63}' +    ')';  re.src_domain =    '(?:' +      re.src_xn +      '|' +      '(?:' + re.src_pseudo_letter + ')' +      '|' +      '(?:' + re.src_pseudo_letter + '(?:-|' + re.src_pseudo_letter + '){0,61}' + re.src_pseudo_letter + ')' +    ')';  re.src_host =    '(?:' +    // Don't need IP check, because digits are already allowed in normal domain names    //   src_ip4 +    // '|' +      '(?:(?:(?:' + re.src_domain + ')\\.)*' + re.src_domain/*_root*/ + ')' +    ')';  re.tpl_host_fuzzy =    '(?:' +      re.src_ip4 +    '|' +      '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))' +    ')';  re.tpl_host_no_ip_fuzzy =    '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))';  re.src_host_strict =    re.src_host + re.src_host_terminator;  re.tpl_host_fuzzy_strict =    re.tpl_host_fuzzy + re.src_host_terminator;  re.src_host_port_strict =    re.src_host + re.src_port + re.src_host_terminator;  re.tpl_host_port_fuzzy_strict =    re.tpl_host_fuzzy + re.src_port + re.src_host_terminator;  re.tpl_host_port_no_ip_fuzzy_strict =    re.tpl_host_no_ip_fuzzy + re.src_port + re.src_host_terminator;  ////////////////////////////////////////////////////////////////////////////////  // Main rules  // Rude test fuzzy links by host, for quick deny  re.tpl_host_fuzzy_test =    'localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + re.src_ZPCc + '|>|$))';  re.tpl_email_fuzzy =      '(^|' + text_separators + '|"|\\(|' + re.src_ZCc + ')' +      '(' + re.src_email_name + '@' + re.tpl_host_fuzzy_strict + ')';  re.tpl_link_fuzzy =      // Fuzzy link can't be prepended with .:/\- and non punctuation.      // but can start with > (markdown blockquote)      '(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + re.src_ZPCc + '))' +      '((?![$+<=>^`|\uff5c])' + re.tpl_host_port_fuzzy_strict + re.src_path + ')';  re.tpl_link_no_ip_fuzzy =      // Fuzzy link can't be prepended with .:/\- and non punctuation.      // but can start with > (markdown blockquote)      '(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + re.src_ZPCc + '))' +      '((?![$+<=>^`|\uff5c])' + re.tpl_host_port_no_ip_fuzzy_strict + re.src_path + ')';  return re;};
 |