index.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. // @ts-check
  2. /* eslint no-use-before-define:0 */
  3. 'use strict'; // Import
  4. var pathUtil = require('path');
  5. var textExtensions = require('textextensions');
  6. var binaryExtensions = require('binaryextensions');
  7. /**
  8. * WIll be `null` if `buffer` was not provided. Otherwise will be either `'utf8'` or `'binary'`.
  9. * @typedef {'utf8'|'binary'|null} EncodingResult
  10. */
  11. /**
  12. * WIll be `null` if neither `filename` nor `buffer` were provided. Otherwise will be a boolean value with the detection result.
  13. * @typedef {boolean|null} TextOrBinaryResult
  14. */
  15. /**
  16. * @typedef {Object} EncodingOpts
  17. * @property {number} [chunkLength = 24]
  18. * @property {number} [chunkBegin = 0]
  19. */
  20. /**
  21. * @callback IsTextCallback
  22. * @param {Error?} error
  23. * @param {TextOrBinaryResult} [isTextResult]
  24. */
  25. /**
  26. * @callback IsBinaryCallback
  27. * @param {Error?} error
  28. * @param {TextOrBinaryResult} [isBinaryResult]
  29. */
  30. /**
  31. * @callback GetEncodingCallback
  32. * @param {Error?} error
  33. * @param {EncodingResult} [encoding]
  34. */
  35. /**
  36. * Determine if the filename and/or buffer is text.
  37. * Determined by extension checks first (if filename is available), otherwise if unknown extension or no filename, will perform a slower buffer encoding detection.
  38. * This order is done, as extension checks are quicker, and also because encoding checks cannot guarantee accuracy for chars between utf8 and utf16.
  39. * The extension checks are performed using the resources https://github.com/bevry/textextensions and https://github.com/bevry/binaryextensions
  40. * In a later major release, this function will become {@link isText} so you should use that instead.
  41. * @param {string} [filename] The filename for the file/buffer if available
  42. * @param {Buffer} [buffer] The buffer for the file if available
  43. * @returns {TextOrBinaryResult}
  44. */
  45. function isTextSync(filename, buffer) {
  46. // Test extensions
  47. if (filename) {
  48. // Extract filename
  49. var parts = pathUtil.basename(filename).split('.').reverse(); // Cycle extensions
  50. var _iteratorNormalCompletion = true;
  51. var _didIteratorError = false;
  52. var _iteratorError = undefined;
  53. try {
  54. for (var _iterator = parts[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
  55. var extension = _step.value;
  56. if (textExtensions.indexOf(extension) !== -1) {
  57. return true;
  58. }
  59. if (binaryExtensions.indexOf(extension) !== -1) {
  60. return false;
  61. }
  62. }
  63. } catch (err) {
  64. _didIteratorError = true;
  65. _iteratorError = err;
  66. } finally {
  67. try {
  68. if (!_iteratorNormalCompletion && _iterator.return != null) {
  69. _iterator.return();
  70. }
  71. } finally {
  72. if (_didIteratorError) {
  73. throw _iteratorError;
  74. }
  75. }
  76. }
  77. } // Fallback to encoding if extension check was not enough
  78. if (buffer) {
  79. return getEncodingSync(buffer) === 'utf8';
  80. } // No buffer was provided
  81. return null;
  82. }
  83. /**
  84. * Callback wrapper for {@link isTextSync}.
  85. * @param {string?} filename
  86. * @param {Buffer?} buffer
  87. * @param {IsTextCallback} callback
  88. * @returns {void}
  89. */
  90. function isTextCallback(filename, buffer, callback) {
  91. var result;
  92. try {
  93. result = isTextSync(filename, buffer);
  94. } catch (err) {
  95. callback(err);
  96. }
  97. callback(null, result);
  98. }
  99. /**
  100. * Promise wrapper for {@link isTextSync}.
  101. * @param {string?} filename
  102. * @param {Buffer?} buffer
  103. * @returns {Promise<TextOrBinaryResult>}
  104. */
  105. function isTextPromise(filename, buffer) {
  106. try {
  107. return Promise.resolve(isTextSync(filename, buffer));
  108. } catch (err) {
  109. return Promise.reject(err);
  110. }
  111. }
  112. /**
  113. * Wrapper around {@link isTextSync} for sync signature and {@link isTextCallback} async signature.
  114. * In a later major release, {@link isTextSync}.will become this function, so if you prefer the callback interface you should use {@link isTextCallback}.
  115. * @param {string?} filename
  116. * @param {Buffer?} buffer
  117. * @param {IsTextCallback} [callback] If provided, void will be returned, as the result will provided to the callback.
  118. * @returns {TextOrBinaryResult|void} If no callback was provided, then the result is returned.
  119. */
  120. function isText(filename, buffer, callback) {
  121. if (callback) {
  122. return isTextCallback(filename, buffer, callback);
  123. } else return isTextSync(filename, buffer);
  124. }
  125. /**
  126. * Inverse wrapper for {@link isTextSync}.
  127. * In a later major release, this function will become {@link isBinary} so you should use that instead.
  128. * @param {string} [filename]
  129. * @param {Buffer} [buffer]
  130. * @returns {TextOrBinaryResult}
  131. */
  132. function isBinarySync(filename, buffer) {
  133. var text = isTextSync(filename, buffer);
  134. if (text == null) return null;
  135. return !text;
  136. }
  137. /**
  138. * Callback wrapper for {@link isBinarySync}.
  139. * @param {string?} filename
  140. * @param {Buffer?} buffer
  141. * @param {IsTextCallback} callback
  142. * @returns {void}
  143. */
  144. function isBinaryCallback(filename, buffer, callback) {
  145. var result;
  146. try {
  147. result = isBinarySync(filename, buffer);
  148. } catch (err) {
  149. callback(err);
  150. }
  151. callback(null, result);
  152. }
  153. /**
  154. * Promise wrapper for {@link isBinarySync}.
  155. * @param {string?} filename
  156. * @param {Buffer?} buffer
  157. * @returns {Promise<TextOrBinaryResult>}
  158. */
  159. function isBinaryPromise(filename, buffer) {
  160. try {
  161. return Promise.resolve(isBinarySync(filename, buffer));
  162. } catch (err) {
  163. return Promise.reject(err);
  164. }
  165. }
  166. /**
  167. * Wrapper around {@link isBinarySync} for sync signature and {@link isBinaryCallback} async signature.
  168. * In a later major release, {@link isBinarySync}.will become this function, so if you prefer the callback interface you should use {@link isBinaryCallback}.
  169. * @param {string?} filename
  170. * @param {Buffer?} buffer
  171. * @param {IsTextCallback} [callback] If provided, void will be returned, as the result will provided to the callback.
  172. * @returns {TextOrBinaryResult|void} If no callback was provided, then the result is returned.
  173. */
  174. function isBinary(filename, buffer, callback) {
  175. if (callback) {
  176. return isBinaryCallback(filename, buffer, callback);
  177. } else return isBinarySync(filename, buffer);
  178. }
  179. /**
  180. * Get the encoding of a buffer.
  181. * Checks the start, middle, and end of the buffer for characters that are unrecognized within UTF8 encoding.
  182. * History has shown that inspection at all three locations is necessary.
  183. * In a later major release, this function will become {@link getEncoding} so you should use that instead.
  184. * @param {Buffer} buffer
  185. * @param {EncodingOpts} [opts]
  186. * @returns {EncodingResult}
  187. */
  188. function getEncodingSync(buffer, opts) {
  189. // Check
  190. if (!buffer) return null; // Prepare
  191. var textEncoding = 'utf8';
  192. var binaryEncoding = 'binary'; // Discover
  193. if (opts == null) {
  194. // Start
  195. var chunkLength = 24;
  196. var encoding = getEncodingSync(buffer, {
  197. chunkLength: chunkLength
  198. });
  199. if (encoding === textEncoding) {
  200. // Middle
  201. var chunkBegin = Math.max(0, Math.floor(buffer.length / 2) - chunkLength);
  202. encoding = getEncodingSync(buffer, {
  203. chunkLength: chunkLength,
  204. chunkBegin: chunkBegin
  205. });
  206. if (encoding === textEncoding) {
  207. // End
  208. chunkBegin = Math.max(0, buffer.length - chunkLength);
  209. encoding = getEncodingSync(buffer, {
  210. chunkLength: chunkLength,
  211. chunkBegin: chunkBegin
  212. });
  213. }
  214. } // Return
  215. return encoding;
  216. } else {
  217. // Extract
  218. var _opts$chunkLength = opts.chunkLength,
  219. _chunkLength = _opts$chunkLength === void 0 ? 24 : _opts$chunkLength,
  220. _opts$chunkBegin = opts.chunkBegin,
  221. _chunkBegin = _opts$chunkBegin === void 0 ? 0 : _opts$chunkBegin;
  222. var chunkEnd = Math.min(buffer.length, _chunkBegin + _chunkLength);
  223. var contentChunkUTF8 = buffer.toString(textEncoding, _chunkBegin, chunkEnd); // Detect encoding
  224. for (var i = 0; i < contentChunkUTF8.length; ++i) {
  225. var charCode = contentChunkUTF8.charCodeAt(i);
  226. if (charCode === 65533 || charCode <= 8) {
  227. // 8 and below are control characters (e.g. backspace, null, eof, etc.)
  228. // 65533 is the unknown character
  229. // console.log(charCode, contentChunkUTF8[i])
  230. return binaryEncoding;
  231. }
  232. } // Return
  233. return textEncoding;
  234. }
  235. }
  236. /**
  237. * Get the encoding of a buffer.
  238. * Uses {@link getEncodingSync} behind the scenes.
  239. * @param {Buffer} buffer
  240. * @param {EncodingOpts} [opts]
  241. * @param {GetEncodingCallback} callback
  242. * @returns {void}
  243. */
  244. function getEncodingCallback(buffer, opts, callback) {
  245. if (typeof opts === 'function' && callback == null) return getEncodingCallback(buffer, null, opts);
  246. /** @type {EncodingResult?} */
  247. var result;
  248. try {
  249. result = getEncodingSync(buffer, opts);
  250. } catch (err) {
  251. callback(err);
  252. }
  253. callback(null, result);
  254. }
  255. /**
  256. * Promise wrapper for {@link getEncodingSync}.
  257. * @param {Buffer} buffer
  258. * @param {EncodingOpts} [opts]
  259. * @returns {Promise<EncodingResult>}
  260. */
  261. function getEncodingPromise(buffer, opts) {
  262. try {
  263. return Promise.resolve(getEncodingSync(buffer, opts));
  264. } catch (err) {
  265. return Promise.reject(err);
  266. }
  267. }
  268. /**
  269. * Wrapper around {@link getEncodingSync} for sync signature and {@link getEncodingCallback} async signature.
  270. * In a later major release, {@link getEncodingSync}.will become this function, so if you prefer the callback interface you should use {@link getEncodingCallback}.
  271. * @param {Buffer} buffer
  272. * @param {EncodingOpts} [opts]
  273. * @param {GetEncodingCallback} [callback] If provided, void will be returned, as the result will provided to the callback.
  274. * @returns {EncodingResult|void} If no callback was provided, then the result is returned.
  275. */
  276. function getEncoding(buffer, opts, callback) {
  277. if (callback || typeof opts === 'function') {
  278. return getEncodingCallback(buffer, opts, callback);
  279. } else return getEncodingSync(buffer, opts);
  280. } // Export
  281. module.exports = {
  282. isTextSync: isTextSync,
  283. isTextCallback: isTextCallback,
  284. isTextPromise: isTextPromise,
  285. isText: isText,
  286. isBinarySync: isBinarySync,
  287. isBinaryCallback: isBinaryCallback,
  288. isBinaryPromise: isBinaryPromise,
  289. isBinary: isBinary,
  290. getEncoding: getEncoding,
  291. getEncodingSync: getEncodingSync,
  292. getEncodingPromise: getEncodingPromise,
  293. getEncodingCallback: getEncodingCallback
  294. };