index.js 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. // @ts-check
  2. /* eslint no-use-before-define:0 */
  3. 'use strict'
  4. // Import
  5. const pathUtil = require('path')
  6. const textExtensions = require('textextensions')
  7. const binaryExtensions = require('binaryextensions')
  8. /**
  9. * WIll be `null` if `buffer` was not provided. Otherwise will be either `'utf8'` or `'binary'`.
  10. * @typedef {'utf8'|'binary'|null} EncodingResult
  11. */
  12. /**
  13. * WIll be `null` if neither `filename` nor `buffer` were provided. Otherwise will be a boolean value with the detection result.
  14. * @typedef {boolean|null} TextOrBinaryResult
  15. */
  16. /**
  17. * @typedef {Object} EncodingOpts
  18. * @property {number} [chunkLength = 24]
  19. * @property {number} [chunkBegin = 0]
  20. */
  21. /**
  22. * @callback IsTextCallback
  23. * @param {Error?} error
  24. * @param {TextOrBinaryResult} [isTextResult]
  25. */
  26. /**
  27. * @callback IsBinaryCallback
  28. * @param {Error?} error
  29. * @param {TextOrBinaryResult} [isBinaryResult]
  30. */
  31. /**
  32. * @callback GetEncodingCallback
  33. * @param {Error?} error
  34. * @param {EncodingResult} [encoding]
  35. */
  36. /**
  37. * Determine if the filename and/or buffer is text.
  38. * Determined by extension checks first (if filename is available), otherwise if unknown extension or no filename, will perform a slower buffer encoding detection.
  39. * This order is done, as extension checks are quicker, and also because encoding checks cannot guarantee accuracy for chars between utf8 and utf16.
  40. * The extension checks are performed using the resources https://github.com/bevry/textextensions and https://github.com/bevry/binaryextensions
  41. * In a later major release, this function will become {@link isText} so you should use that instead.
  42. * @param {string} [filename] The filename for the file/buffer if available
  43. * @param {Buffer} [buffer] The buffer for the file if available
  44. * @returns {TextOrBinaryResult}
  45. */
  46. function isTextSync(filename, buffer) {
  47. // Test extensions
  48. if (filename) {
  49. // Extract filename
  50. const parts = pathUtil
  51. .basename(filename)
  52. .split('.')
  53. .reverse()
  54. // Cycle extensions
  55. for (const extension of parts) {
  56. if (textExtensions.indexOf(extension) !== -1) {
  57. return true
  58. }
  59. if (binaryExtensions.indexOf(extension) !== -1) {
  60. return false
  61. }
  62. }
  63. }
  64. // Fallback to encoding if extension check was not enough
  65. if (buffer) {
  66. return getEncodingSync(buffer) === 'utf8'
  67. }
  68. // No buffer was provided
  69. return null
  70. }
  71. /**
  72. * Callback wrapper for {@link isTextSync}.
  73. * @param {string?} filename
  74. * @param {Buffer?} buffer
  75. * @param {IsTextCallback} callback
  76. * @returns {void}
  77. */
  78. function isTextCallback(filename, buffer, callback) {
  79. let result
  80. try {
  81. result = isTextSync(filename, buffer)
  82. } catch (err) {
  83. callback(err)
  84. }
  85. callback(null, result)
  86. }
  87. /**
  88. * Promise wrapper for {@link isTextSync}.
  89. * @param {string?} filename
  90. * @param {Buffer?} buffer
  91. * @returns {Promise<TextOrBinaryResult>}
  92. */
  93. function isTextPromise(filename, buffer) {
  94. try {
  95. return Promise.resolve(isTextSync(filename, buffer))
  96. } catch (err) {
  97. return Promise.reject(err)
  98. }
  99. }
  100. /**
  101. * Wrapper around {@link isTextSync} for sync signature and {@link isTextCallback} async signature.
  102. * In a later major release, {@link isTextSync}.will become this function, so if you prefer the callback interface you should use {@link isTextCallback}.
  103. * @param {string?} filename
  104. * @param {Buffer?} buffer
  105. * @param {IsTextCallback} [callback] If provided, void will be returned, as the result will provided to the callback.
  106. * @returns {TextOrBinaryResult|void} If no callback was provided, then the result is returned.
  107. */
  108. function isText(filename, buffer, callback) {
  109. if (callback) {
  110. return isTextCallback(filename, buffer, callback)
  111. } else return isTextSync(filename, buffer)
  112. }
  113. /**
  114. * Inverse wrapper for {@link isTextSync}.
  115. * In a later major release, this function will become {@link isBinary} so you should use that instead.
  116. * @param {string} [filename]
  117. * @param {Buffer} [buffer]
  118. * @returns {TextOrBinaryResult}
  119. */
  120. function isBinarySync(filename, buffer) {
  121. const text = isTextSync(filename, buffer)
  122. if (text == null) return null
  123. return !text
  124. }
  125. /**
  126. * Callback wrapper for {@link isBinarySync}.
  127. * @param {string?} filename
  128. * @param {Buffer?} buffer
  129. * @param {IsTextCallback} callback
  130. * @returns {void}
  131. */
  132. function isBinaryCallback(filename, buffer, callback) {
  133. let result
  134. try {
  135. result = isBinarySync(filename, buffer)
  136. } catch (err) {
  137. callback(err)
  138. }
  139. callback(null, result)
  140. }
  141. /**
  142. * Promise wrapper for {@link isBinarySync}.
  143. * @param {string?} filename
  144. * @param {Buffer?} buffer
  145. * @returns {Promise<TextOrBinaryResult>}
  146. */
  147. function isBinaryPromise(filename, buffer) {
  148. try {
  149. return Promise.resolve(isBinarySync(filename, buffer))
  150. } catch (err) {
  151. return Promise.reject(err)
  152. }
  153. }
  154. /**
  155. * Wrapper around {@link isBinarySync} for sync signature and {@link isBinaryCallback} async signature.
  156. * In a later major release, {@link isBinarySync}.will become this function, so if you prefer the callback interface you should use {@link isBinaryCallback}.
  157. * @param {string?} filename
  158. * @param {Buffer?} buffer
  159. * @param {IsTextCallback} [callback] If provided, void will be returned, as the result will provided to the callback.
  160. * @returns {TextOrBinaryResult|void} If no callback was provided, then the result is returned.
  161. */
  162. function isBinary(filename, buffer, callback) {
  163. if (callback) {
  164. return isBinaryCallback(filename, buffer, callback)
  165. } else return isBinarySync(filename, buffer)
  166. }
  167. /**
  168. * Get the encoding of a buffer.
  169. * Checks the start, middle, and end of the buffer for characters that are unrecognized within UTF8 encoding.
  170. * History has shown that inspection at all three locations is necessary.
  171. * In a later major release, this function will become {@link getEncoding} so you should use that instead.
  172. * @param {Buffer} buffer
  173. * @param {EncodingOpts} [opts]
  174. * @returns {EncodingResult}
  175. */
  176. function getEncodingSync(buffer, opts) {
  177. // Check
  178. if (!buffer) return null
  179. // Prepare
  180. const textEncoding = 'utf8'
  181. const binaryEncoding = 'binary'
  182. // Discover
  183. if (opts == null) {
  184. // Start
  185. const chunkLength = 24
  186. let encoding = getEncodingSync(buffer, { chunkLength })
  187. if (encoding === textEncoding) {
  188. // Middle
  189. let chunkBegin = Math.max(0, Math.floor(buffer.length / 2) - chunkLength)
  190. encoding = getEncodingSync(buffer, { chunkLength, chunkBegin })
  191. if (encoding === textEncoding) {
  192. // End
  193. chunkBegin = Math.max(0, buffer.length - chunkLength)
  194. encoding = getEncodingSync(buffer, { chunkLength, chunkBegin })
  195. }
  196. }
  197. // Return
  198. return encoding
  199. } else {
  200. // Extract
  201. const { chunkLength = 24, chunkBegin = 0 } = opts
  202. const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength)
  203. const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd)
  204. // Detect encoding
  205. for (let i = 0; i < contentChunkUTF8.length; ++i) {
  206. const charCode = contentChunkUTF8.charCodeAt(i)
  207. if (charCode === 65533 || charCode <= 8) {
  208. // 8 and below are control characters (e.g. backspace, null, eof, etc.)
  209. // 65533 is the unknown character
  210. // console.log(charCode, contentChunkUTF8[i])
  211. return binaryEncoding
  212. }
  213. }
  214. // Return
  215. return textEncoding
  216. }
  217. }
  218. /**
  219. * Get the encoding of a buffer.
  220. * Uses {@link getEncodingSync} behind the scenes.
  221. * @param {Buffer} buffer
  222. * @param {EncodingOpts} [opts]
  223. * @param {GetEncodingCallback} callback
  224. * @returns {void}
  225. */
  226. function getEncodingCallback(buffer, opts, callback) {
  227. if (typeof opts === 'function' && callback == null)
  228. return getEncodingCallback(buffer, null, opts)
  229. /** @type {EncodingResult?} */
  230. let result
  231. try {
  232. result = getEncodingSync(buffer, opts)
  233. } catch (err) {
  234. callback(err)
  235. }
  236. callback(null, result)
  237. }
  238. /**
  239. * Promise wrapper for {@link getEncodingSync}.
  240. * @param {Buffer} buffer
  241. * @param {EncodingOpts} [opts]
  242. * @returns {Promise<EncodingResult>}
  243. */
  244. function getEncodingPromise(buffer, opts) {
  245. try {
  246. return Promise.resolve(getEncodingSync(buffer, opts))
  247. } catch (err) {
  248. return Promise.reject(err)
  249. }
  250. }
  251. /**
  252. * Wrapper around {@link getEncodingSync} for sync signature and {@link getEncodingCallback} async signature.
  253. * In a later major release, {@link getEncodingSync}.will become this function, so if you prefer the callback interface you should use {@link getEncodingCallback}.
  254. * @param {Buffer} buffer
  255. * @param {EncodingOpts} [opts]
  256. * @param {GetEncodingCallback} [callback] If provided, void will be returned, as the result will provided to the callback.
  257. * @returns {EncodingResult|void} If no callback was provided, then the result is returned.
  258. */
  259. function getEncoding(buffer, opts, callback) {
  260. if (callback || typeof opts === 'function') {
  261. return getEncodingCallback(buffer, opts, callback)
  262. } else return getEncodingSync(buffer, opts)
  263. }
  264. // Export
  265. module.exports = {
  266. isTextSync,
  267. isTextCallback,
  268. isTextPromise,
  269. isText,
  270. isBinarySync,
  271. isBinaryCallback,
  272. isBinaryPromise,
  273. isBinary,
  274. getEncoding,
  275. getEncodingSync,
  276. getEncodingPromise,
  277. getEncodingCallback
  278. }