unicode.js 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. 'use strict';
  2. const UNDEFINED_CODE_POINTS = [
  3. 0xfffe,
  4. 0xffff,
  5. 0x1fffe,
  6. 0x1ffff,
  7. 0x2fffe,
  8. 0x2ffff,
  9. 0x3fffe,
  10. 0x3ffff,
  11. 0x4fffe,
  12. 0x4ffff,
  13. 0x5fffe,
  14. 0x5ffff,
  15. 0x6fffe,
  16. 0x6ffff,
  17. 0x7fffe,
  18. 0x7ffff,
  19. 0x8fffe,
  20. 0x8ffff,
  21. 0x9fffe,
  22. 0x9ffff,
  23. 0xafffe,
  24. 0xaffff,
  25. 0xbfffe,
  26. 0xbffff,
  27. 0xcfffe,
  28. 0xcffff,
  29. 0xdfffe,
  30. 0xdffff,
  31. 0xefffe,
  32. 0xeffff,
  33. 0xffffe,
  34. 0xfffff,
  35. 0x10fffe,
  36. 0x10ffff
  37. ];
  38. exports.REPLACEMENT_CHARACTER = '\uFFFD';
  39. exports.CODE_POINTS = {
  40. EOF: -1,
  41. NULL: 0x00,
  42. TABULATION: 0x09,
  43. CARRIAGE_RETURN: 0x0d,
  44. LINE_FEED: 0x0a,
  45. FORM_FEED: 0x0c,
  46. SPACE: 0x20,
  47. EXCLAMATION_MARK: 0x21,
  48. QUOTATION_MARK: 0x22,
  49. NUMBER_SIGN: 0x23,
  50. AMPERSAND: 0x26,
  51. APOSTROPHE: 0x27,
  52. HYPHEN_MINUS: 0x2d,
  53. SOLIDUS: 0x2f,
  54. DIGIT_0: 0x30,
  55. DIGIT_9: 0x39,
  56. SEMICOLON: 0x3b,
  57. LESS_THAN_SIGN: 0x3c,
  58. EQUALS_SIGN: 0x3d,
  59. GREATER_THAN_SIGN: 0x3e,
  60. QUESTION_MARK: 0x3f,
  61. LATIN_CAPITAL_A: 0x41,
  62. LATIN_CAPITAL_F: 0x46,
  63. LATIN_CAPITAL_X: 0x58,
  64. LATIN_CAPITAL_Z: 0x5a,
  65. RIGHT_SQUARE_BRACKET: 0x5d,
  66. GRAVE_ACCENT: 0x60,
  67. LATIN_SMALL_A: 0x61,
  68. LATIN_SMALL_F: 0x66,
  69. LATIN_SMALL_X: 0x78,
  70. LATIN_SMALL_Z: 0x7a,
  71. REPLACEMENT_CHARACTER: 0xfffd
  72. };
  73. exports.CODE_POINT_SEQUENCES = {
  74. DASH_DASH_STRING: [0x2d, 0x2d], //--
  75. DOCTYPE_STRING: [0x44, 0x4f, 0x43, 0x54, 0x59, 0x50, 0x45], //DOCTYPE
  76. CDATA_START_STRING: [0x5b, 0x43, 0x44, 0x41, 0x54, 0x41, 0x5b], //[CDATA[
  77. SCRIPT_STRING: [0x73, 0x63, 0x72, 0x69, 0x70, 0x74], //script
  78. PUBLIC_STRING: [0x50, 0x55, 0x42, 0x4c, 0x49, 0x43], //PUBLIC
  79. SYSTEM_STRING: [0x53, 0x59, 0x53, 0x54, 0x45, 0x4d] //SYSTEM
  80. };
  81. //Surrogates
  82. exports.isSurrogate = function(cp) {
  83. return cp >= 0xd800 && cp <= 0xdfff;
  84. };
  85. exports.isSurrogatePair = function(cp) {
  86. return cp >= 0xdc00 && cp <= 0xdfff;
  87. };
  88. exports.getSurrogatePairCodePoint = function(cp1, cp2) {
  89. return (cp1 - 0xd800) * 0x400 + 0x2400 + cp2;
  90. };
  91. //NOTE: excluding NULL and ASCII whitespace
  92. exports.isControlCodePoint = function(cp) {
  93. return (
  94. (cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
  95. (cp >= 0x7f && cp <= 0x9f)
  96. );
  97. };
  98. exports.isUndefinedCodePoint = function(cp) {
  99. return (cp >= 0xfdd0 && cp <= 0xfdef) || UNDEFINED_CODE_POINTS.indexOf(cp) > -1;
  100. };