index.js 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. var RE_SPACEDASH = /[- ]/g;
  2. // Module exports
  3. var iconv = module.exports = {
  4. toEncoding: function(str, encoding) {
  5. return iconv.getCodec(encoding).toEncoding(str);
  6. },
  7. fromEncoding: function(buf, encoding) {
  8. return iconv.getCodec(encoding).fromEncoding(buf);
  9. },
  10. encodingExists: function(enc) {
  11. loadEncodings();
  12. enc = enc.replace(RE_SPACEDASH, "").toLowerCase();
  13. return (iconv.encodings[enc] !== undefined);
  14. },
  15. defaultCharUnicode: '�',
  16. defaultCharSingleByte: '?',
  17. encodingsLoaded: false,
  18. // Get correct codec for given encoding.
  19. getCodec: function(encoding) {
  20. loadEncodings();
  21. var enc = encoding || "utf8";
  22. var codecOptions = undefined;
  23. while (1) {
  24. if (getType(enc) === "String")
  25. enc = enc.replace(RE_SPACEDASH, "").toLowerCase();
  26. var codec = iconv.encodings[enc];
  27. var type = getType(codec);
  28. if (type === "String") {
  29. // Link to other encoding.
  30. codecOptions = {originalEncoding: enc};
  31. enc = codec;
  32. }
  33. else if (type === "Object" && codec.type != undefined) {
  34. // Options for other encoding.
  35. codecOptions = codec;
  36. enc = codec.type;
  37. }
  38. else if (type === "Function")
  39. // Codec itself.
  40. return codec(codecOptions);
  41. else
  42. throw new Error("Encoding not recognized: '" + encoding + "' (searched as: '"+enc+"')");
  43. }
  44. },
  45. // Define basic encodings
  46. encodings: {
  47. internal: function(options) {
  48. return {
  49. toEncoding: toInternalEncoding,
  50. fromEncoding: fromInternalEncoding,
  51. options: options
  52. };
  53. },
  54. utf8: "internal",
  55. ucs2: "internal",
  56. binary: "internal",
  57. ascii: "internal",
  58. base64: "internal",
  59. // Codepage single-byte encodings.
  60. singlebyte: function(options) {
  61. // Prepare chars if needed
  62. if (!options.charsBuf) {
  63. if (!options.chars || (options.chars.length !== 128 && options.chars.length !== 256))
  64. throw new Error("Encoding '"+options.type+"' has incorrect 'chars' (must be of len 128 or 256)");
  65. if (options.chars.length === 128)
  66. options.chars = asciiString + options.chars;
  67. options.charsBuf = new Buffer(options.chars, 'ucs2');
  68. }
  69. if (!options.revCharsBuf) {
  70. options.revCharsBuf = new Buffer(65536);
  71. var defChar = iconv.defaultCharSingleByte.charCodeAt(0);
  72. for (var i = 0; i < options.revCharsBuf.length; i++)
  73. options.revCharsBuf[i] = defChar;
  74. for (var i = 0; i < options.chars.length; i++)
  75. options.revCharsBuf[options.chars.charCodeAt(i)] = i;
  76. }
  77. return {
  78. toEncoding: toSingleByteEncoding,
  79. fromEncoding: fromSingleByteEncoding,
  80. options: options,
  81. };
  82. },
  83. // Codepage double-byte encodings.
  84. table: function(options) {
  85. if (!options.table) {
  86. throw new Error("Encoding '" + options.type + "' has incorect 'table' option");
  87. }
  88. if (!options.revCharsTable) {
  89. var revCharsTable = options.revCharsTable = {};
  90. for (var i = 0; i <= 0xFFFF; i++) {
  91. revCharsTable[i] = 0;
  92. }
  93. var table = options.table;
  94. for (var key in table) {
  95. revCharsTable[table[key]] = +key;
  96. }
  97. }
  98. return {
  99. toEncoding: toTableEncoding,
  100. fromEncoding: fromTableEncoding,
  101. options: options,
  102. };
  103. }
  104. }
  105. };
  106. function toInternalEncoding(str) {
  107. return new Buffer(ensureString(str), this.options.originalEncoding);
  108. }
  109. function fromInternalEncoding(buf) {
  110. return ensureBuffer(buf).toString(this.options.originalEncoding);
  111. }
  112. function toTableEncoding(str) {
  113. str = ensureString(str);
  114. var strLen = str.length;
  115. var revCharsTable = this.options.revCharsTable;
  116. var newBuf = new Buffer(strLen*2), gbkcode, unicode,
  117. defaultChar = revCharsTable[iconv.defaultCharUnicode.charCodeAt(0)];
  118. for (var i = 0, j = 0; i < strLen; i++) {
  119. unicode = str.charCodeAt(i);
  120. if (unicode >> 7) {
  121. gbkcode = revCharsTable[unicode] || defaultChar;
  122. newBuf[j++] = gbkcode >> 8; //high byte;
  123. newBuf[j++] = gbkcode & 0xFF; //low byte
  124. } else {//ascii
  125. newBuf[j++] = unicode;
  126. }
  127. }
  128. return newBuf.slice(0, j);
  129. }
  130. function fromTableEncoding(buf) {
  131. buf = ensureBuffer(buf);
  132. var bufLen = buf.length;
  133. var table = this.options.table;
  134. var newBuf = new Buffer(bufLen*2), unicode, gbkcode,
  135. defaultChar = iconv.defaultCharUnicode.charCodeAt(0);
  136. for (var i = 0, j = 0; i < bufLen; i++, j+=2) {
  137. gbkcode = buf[i];
  138. if (gbkcode & 0x80) {
  139. gbkcode = (gbkcode << 8) + buf[++i];
  140. unicode = table[gbkcode] || defaultChar;
  141. } else {
  142. unicode = gbkcode;
  143. }
  144. newBuf[j] = unicode & 0xFF; //low byte
  145. newBuf[j+1] = unicode >> 8; //high byte
  146. }
  147. return newBuf.slice(0, j).toString('ucs2');
  148. }
  149. function toSingleByteEncoding(str) {
  150. str = ensureString(str);
  151. var buf = new Buffer(str.length);
  152. var revCharsBuf = this.options.revCharsBuf;
  153. for (var i = 0; i < str.length; i++)
  154. buf[i] = revCharsBuf[str.charCodeAt(i)];
  155. return buf;
  156. }
  157. function fromSingleByteEncoding(buf) {
  158. buf = ensureBuffer(buf);
  159. // Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
  160. var charsBuf = this.options.charsBuf;
  161. var newBuf = new Buffer(buf.length*2);
  162. var idx1 = 0, idx2 = 0;
  163. for (var i = 0, _len = buf.length; i < _len; i++) {
  164. idx1 = buf[i]*2; idx2 = i*2;
  165. newBuf[idx2] = charsBuf[idx1];
  166. newBuf[idx2+1] = charsBuf[idx1+1];
  167. }
  168. return newBuf.toString('ucs2');
  169. }
  170. // Add aliases to convert functions
  171. iconv.encode = iconv.toEncoding;
  172. iconv.decode = iconv.fromEncoding;
  173. // Load other encodings manually from files in /encodings dir.
  174. function loadEncodings() {
  175. if (!iconv.encodingsLoaded) {
  176. [ require('./encodings/singlebyte'),
  177. require('./encodings/gbk'),
  178. require('./encodings/big5')
  179. ].forEach(function(encodings) {
  180. for (var key in encodings)
  181. iconv.encodings[key] = encodings[key]
  182. });
  183. iconv.encodingsLoaded = true;
  184. }
  185. }
  186. // Utilities
  187. var asciiString = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'+
  188. ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f';
  189. var ensureBuffer = function(buf) {
  190. buf = buf || new Buffer(0);
  191. return (buf instanceof Buffer) ? buf : new Buffer(""+buf, "binary");
  192. }
  193. var ensureString = function(str) {
  194. str = str || "";
  195. return (str instanceof Buffer) ? str.toString('utf8') : (""+str);
  196. }
  197. var getType = function(obj) {
  198. return Object.prototype.toString.call(obj).slice(8, -1);
  199. }