indexOf.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. 'use strict'
  2. const tape = require('tape')
  3. const BufferList = require('../')
  4. const { Buffer } = require('buffer')
  5. tape('indexOf single byte needle', (t) => {
  6. const bl = new BufferList(['abcdefg', 'abcdefg', '12345'])
  7. t.equal(bl.indexOf('e'), 4)
  8. t.equal(bl.indexOf('e', 5), 11)
  9. t.equal(bl.indexOf('e', 12), -1)
  10. t.equal(bl.indexOf('5'), 18)
  11. t.end()
  12. })
  13. tape('indexOf multiple byte needle', (t) => {
  14. const bl = new BufferList(['abcdefg', 'abcdefg'])
  15. t.equal(bl.indexOf('ef'), 4)
  16. t.equal(bl.indexOf('ef', 5), 11)
  17. t.end()
  18. })
  19. tape('indexOf multiple byte needles across buffer boundaries', (t) => {
  20. const bl = new BufferList(['abcdefg', 'abcdefg'])
  21. t.equal(bl.indexOf('fgabc'), 5)
  22. t.end()
  23. })
  24. tape('indexOf takes a Uint8Array search', (t) => {
  25. const bl = new BufferList(['abcdefg', 'abcdefg'])
  26. const search = new Uint8Array([102, 103, 97, 98, 99]) // fgabc
  27. t.equal(bl.indexOf(search), 5)
  28. t.end()
  29. })
  30. tape('indexOf takes a buffer list search', (t) => {
  31. const bl = new BufferList(['abcdefg', 'abcdefg'])
  32. const search = new BufferList('fgabc')
  33. t.equal(bl.indexOf(search), 5)
  34. t.end()
  35. })
  36. tape('indexOf a zero byte needle', (t) => {
  37. const b = new BufferList('abcdef')
  38. const bufEmpty = Buffer.from('')
  39. t.equal(b.indexOf(''), 0)
  40. t.equal(b.indexOf('', 1), 1)
  41. t.equal(b.indexOf('', b.length + 1), b.length)
  42. t.equal(b.indexOf('', Infinity), b.length)
  43. t.equal(b.indexOf(bufEmpty), 0)
  44. t.equal(b.indexOf(bufEmpty, 1), 1)
  45. t.equal(b.indexOf(bufEmpty, b.length + 1), b.length)
  46. t.equal(b.indexOf(bufEmpty, Infinity), b.length)
  47. t.end()
  48. })
  49. tape('indexOf buffers smaller and larger than the needle', (t) => {
  50. const bl = new BufferList(['abcdefg', 'a', 'bcdefg', 'a', 'bcfgab'])
  51. t.equal(bl.indexOf('fgabc'), 5)
  52. t.equal(bl.indexOf('fgabc', 6), 12)
  53. t.equal(bl.indexOf('fgabc', 13), -1)
  54. t.end()
  55. })
  56. // only present in node 6+
  57. ;(process.version.substr(1).split('.')[0] >= 6) && tape('indexOf latin1 and binary encoding', (t) => {
  58. const b = new BufferList('abcdef')
  59. // test latin1 encoding
  60. t.equal(
  61. new BufferList(Buffer.from(b.toString('latin1'), 'latin1'))
  62. .indexOf('d', 0, 'latin1'),
  63. 3
  64. )
  65. t.equal(
  66. new BufferList(Buffer.from(b.toString('latin1'), 'latin1'))
  67. .indexOf(Buffer.from('d', 'latin1'), 0, 'latin1'),
  68. 3
  69. )
  70. t.equal(
  71. new BufferList(Buffer.from('aa\u00e8aa', 'latin1'))
  72. .indexOf('\u00e8', 'latin1'),
  73. 2
  74. )
  75. t.equal(
  76. new BufferList(Buffer.from('\u00e8', 'latin1'))
  77. .indexOf('\u00e8', 'latin1'),
  78. 0
  79. )
  80. t.equal(
  81. new BufferList(Buffer.from('\u00e8', 'latin1'))
  82. .indexOf(Buffer.from('\u00e8', 'latin1'), 'latin1'),
  83. 0
  84. )
  85. // test binary encoding
  86. t.equal(
  87. new BufferList(Buffer.from(b.toString('binary'), 'binary'))
  88. .indexOf('d', 0, 'binary'),
  89. 3
  90. )
  91. t.equal(
  92. new BufferList(Buffer.from(b.toString('binary'), 'binary'))
  93. .indexOf(Buffer.from('d', 'binary'), 0, 'binary'),
  94. 3
  95. )
  96. t.equal(
  97. new BufferList(Buffer.from('aa\u00e8aa', 'binary'))
  98. .indexOf('\u00e8', 'binary'),
  99. 2
  100. )
  101. t.equal(
  102. new BufferList(Buffer.from('\u00e8', 'binary'))
  103. .indexOf('\u00e8', 'binary'),
  104. 0
  105. )
  106. t.equal(
  107. new BufferList(Buffer.from('\u00e8', 'binary'))
  108. .indexOf(Buffer.from('\u00e8', 'binary'), 'binary'),
  109. 0
  110. )
  111. t.end()
  112. })
  113. tape('indexOf the entire nodejs10 buffer test suite', (t) => {
  114. const b = new BufferList('abcdef')
  115. const bufA = Buffer.from('a')
  116. const bufBc = Buffer.from('bc')
  117. const bufF = Buffer.from('f')
  118. const bufZ = Buffer.from('z')
  119. const stringComparison = 'abcdef'
  120. t.equal(b.indexOf('a'), 0)
  121. t.equal(b.indexOf('a', 1), -1)
  122. t.equal(b.indexOf('a', -1), -1)
  123. t.equal(b.indexOf('a', -4), -1)
  124. t.equal(b.indexOf('a', -b.length), 0)
  125. t.equal(b.indexOf('a', NaN), 0)
  126. t.equal(b.indexOf('a', -Infinity), 0)
  127. t.equal(b.indexOf('a', Infinity), -1)
  128. t.equal(b.indexOf('bc'), 1)
  129. t.equal(b.indexOf('bc', 2), -1)
  130. t.equal(b.indexOf('bc', -1), -1)
  131. t.equal(b.indexOf('bc', -3), -1)
  132. t.equal(b.indexOf('bc', -5), 1)
  133. t.equal(b.indexOf('bc', NaN), 1)
  134. t.equal(b.indexOf('bc', -Infinity), 1)
  135. t.equal(b.indexOf('bc', Infinity), -1)
  136. t.equal(b.indexOf('f'), b.length - 1)
  137. t.equal(b.indexOf('z'), -1)
  138. // empty search tests
  139. t.equal(b.indexOf(bufA), 0)
  140. t.equal(b.indexOf(bufA, 1), -1)
  141. t.equal(b.indexOf(bufA, -1), -1)
  142. t.equal(b.indexOf(bufA, -4), -1)
  143. t.equal(b.indexOf(bufA, -b.length), 0)
  144. t.equal(b.indexOf(bufA, NaN), 0)
  145. t.equal(b.indexOf(bufA, -Infinity), 0)
  146. t.equal(b.indexOf(bufA, Infinity), -1)
  147. t.equal(b.indexOf(bufBc), 1)
  148. t.equal(b.indexOf(bufBc, 2), -1)
  149. t.equal(b.indexOf(bufBc, -1), -1)
  150. t.equal(b.indexOf(bufBc, -3), -1)
  151. t.equal(b.indexOf(bufBc, -5), 1)
  152. t.equal(b.indexOf(bufBc, NaN), 1)
  153. t.equal(b.indexOf(bufBc, -Infinity), 1)
  154. t.equal(b.indexOf(bufBc, Infinity), -1)
  155. t.equal(b.indexOf(bufF), b.length - 1)
  156. t.equal(b.indexOf(bufZ), -1)
  157. t.equal(b.indexOf(0x61), 0)
  158. t.equal(b.indexOf(0x61, 1), -1)
  159. t.equal(b.indexOf(0x61, -1), -1)
  160. t.equal(b.indexOf(0x61, -4), -1)
  161. t.equal(b.indexOf(0x61, -b.length), 0)
  162. t.equal(b.indexOf(0x61, NaN), 0)
  163. t.equal(b.indexOf(0x61, -Infinity), 0)
  164. t.equal(b.indexOf(0x61, Infinity), -1)
  165. t.equal(b.indexOf(0x0), -1)
  166. // test offsets
  167. t.equal(b.indexOf('d', 2), 3)
  168. t.equal(b.indexOf('f', 5), 5)
  169. t.equal(b.indexOf('f', -1), 5)
  170. t.equal(b.indexOf('f', 6), -1)
  171. t.equal(b.indexOf(Buffer.from('d'), 2), 3)
  172. t.equal(b.indexOf(Buffer.from('f'), 5), 5)
  173. t.equal(b.indexOf(Buffer.from('f'), -1), 5)
  174. t.equal(b.indexOf(Buffer.from('f'), 6), -1)
  175. t.equal(Buffer.from('ff').indexOf(Buffer.from('f'), 1, 'ucs2'), -1)
  176. // test invalid and uppercase encoding
  177. t.equal(b.indexOf('b', 'utf8'), 1)
  178. t.equal(b.indexOf('b', 'UTF8'), 1)
  179. t.equal(b.indexOf('62', 'HEX'), 1)
  180. t.throws(() => b.indexOf('bad', 'enc'), TypeError)
  181. // test hex encoding
  182. t.equal(
  183. Buffer.from(b.toString('hex'), 'hex')
  184. .indexOf('64', 0, 'hex'),
  185. 3
  186. )
  187. t.equal(
  188. Buffer.from(b.toString('hex'), 'hex')
  189. .indexOf(Buffer.from('64', 'hex'), 0, 'hex'),
  190. 3
  191. )
  192. // test base64 encoding
  193. t.equal(
  194. Buffer.from(b.toString('base64'), 'base64')
  195. .indexOf('ZA==', 0, 'base64'),
  196. 3
  197. )
  198. t.equal(
  199. Buffer.from(b.toString('base64'), 'base64')
  200. .indexOf(Buffer.from('ZA==', 'base64'), 0, 'base64'),
  201. 3
  202. )
  203. // test ascii encoding
  204. t.equal(
  205. Buffer.from(b.toString('ascii'), 'ascii')
  206. .indexOf('d', 0, 'ascii'),
  207. 3
  208. )
  209. t.equal(
  210. Buffer.from(b.toString('ascii'), 'ascii')
  211. .indexOf(Buffer.from('d', 'ascii'), 0, 'ascii'),
  212. 3
  213. )
  214. // test optional offset with passed encoding
  215. t.equal(Buffer.from('aaaa0').indexOf('30', 'hex'), 4)
  216. t.equal(Buffer.from('aaaa00a').indexOf('3030', 'hex'), 4)
  217. {
  218. // test usc2 encoding
  219. const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2')
  220. t.equal(8, twoByteString.indexOf('\u0395', 4, 'ucs2'))
  221. t.equal(6, twoByteString.indexOf('\u03a3', -4, 'ucs2'))
  222. t.equal(4, twoByteString.indexOf('\u03a3', -6, 'ucs2'))
  223. t.equal(4, twoByteString.indexOf(
  224. Buffer.from('\u03a3', 'ucs2'), -6, 'ucs2'))
  225. t.equal(-1, twoByteString.indexOf('\u03a3', -2, 'ucs2'))
  226. }
  227. const mixedByteStringUcs2 =
  228. Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395', 'ucs2')
  229. t.equal(6, mixedByteStringUcs2.indexOf('bc', 0, 'ucs2'))
  230. t.equal(10, mixedByteStringUcs2.indexOf('\u03a3', 0, 'ucs2'))
  231. t.equal(-1, mixedByteStringUcs2.indexOf('\u0396', 0, 'ucs2'))
  232. t.equal(
  233. 6, mixedByteStringUcs2.indexOf(Buffer.from('bc', 'ucs2'), 0, 'ucs2'))
  234. t.equal(
  235. 10, mixedByteStringUcs2.indexOf(Buffer.from('\u03a3', 'ucs2'), 0, 'ucs2'))
  236. t.equal(
  237. -1, mixedByteStringUcs2.indexOf(Buffer.from('\u0396', 'ucs2'), 0, 'ucs2'))
  238. {
  239. const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2')
  240. // Test single char pattern
  241. t.equal(0, twoByteString.indexOf('\u039a', 0, 'ucs2'))
  242. let index = twoByteString.indexOf('\u0391', 0, 'ucs2')
  243. t.equal(2, index, `Alpha - at index ${index}`)
  244. index = twoByteString.indexOf('\u03a3', 0, 'ucs2')
  245. t.equal(4, index, `First Sigma - at index ${index}`)
  246. index = twoByteString.indexOf('\u03a3', 6, 'ucs2')
  247. t.equal(6, index, `Second Sigma - at index ${index}`)
  248. index = twoByteString.indexOf('\u0395', 0, 'ucs2')
  249. t.equal(8, index, `Epsilon - at index ${index}`)
  250. index = twoByteString.indexOf('\u0392', 0, 'ucs2')
  251. t.equal(-1, index, `Not beta - at index ${index}`)
  252. // Test multi-char pattern
  253. index = twoByteString.indexOf('\u039a\u0391', 0, 'ucs2')
  254. t.equal(0, index, `Lambda Alpha - at index ${index}`)
  255. index = twoByteString.indexOf('\u0391\u03a3', 0, 'ucs2')
  256. t.equal(2, index, `Alpha Sigma - at index ${index}`)
  257. index = twoByteString.indexOf('\u03a3\u03a3', 0, 'ucs2')
  258. t.equal(4, index, `Sigma Sigma - at index ${index}`)
  259. index = twoByteString.indexOf('\u03a3\u0395', 0, 'ucs2')
  260. t.equal(6, index, `Sigma Epsilon - at index ${index}`)
  261. }
  262. const mixedByteStringUtf8 = Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395')
  263. t.equal(5, mixedByteStringUtf8.indexOf('bc'))
  264. t.equal(5, mixedByteStringUtf8.indexOf('bc', 5))
  265. t.equal(5, mixedByteStringUtf8.indexOf('bc', -8))
  266. t.equal(7, mixedByteStringUtf8.indexOf('\u03a3'))
  267. t.equal(-1, mixedByteStringUtf8.indexOf('\u0396'))
  268. // Test complex string indexOf algorithms. Only trigger for long strings.
  269. // Long string that isn't a simple repeat of a shorter string.
  270. let longString = 'A'
  271. for (let i = 66; i < 76; i++) { // from 'B' to 'K'
  272. longString = longString + String.fromCharCode(i) + longString
  273. }
  274. const longBufferString = Buffer.from(longString)
  275. // pattern of 15 chars, repeated every 16 chars in long
  276. let pattern = 'ABACABADABACABA'
  277. for (let i = 0; i < longBufferString.length - pattern.length; i += 7) {
  278. const index = longBufferString.indexOf(pattern, i)
  279. t.equal((i + 15) & ~0xf, index,
  280. `Long ABACABA...-string at index ${i}`)
  281. }
  282. let index = longBufferString.indexOf('AJABACA')
  283. t.equal(510, index, `Long AJABACA, First J - at index ${index}`)
  284. index = longBufferString.indexOf('AJABACA', 511)
  285. t.equal(1534, index, `Long AJABACA, Second J - at index ${index}`)
  286. pattern = 'JABACABADABACABA'
  287. index = longBufferString.indexOf(pattern)
  288. t.equal(511, index, `Long JABACABA..., First J - at index ${index}`)
  289. index = longBufferString.indexOf(pattern, 512)
  290. t.equal(
  291. 1535, index, `Long JABACABA..., Second J - at index ${index}`)
  292. // Search for a non-ASCII string in a pure ASCII string.
  293. const asciiString = Buffer.from(
  294. 'somethingnotatallsinisterwhichalsoworks')
  295. t.equal(-1, asciiString.indexOf('\x2061'))
  296. t.equal(3, asciiString.indexOf('eth', 0))
  297. // Search in string containing many non-ASCII chars.
  298. const allCodePoints = []
  299. for (let i = 0; i < 65536; i++) {
  300. allCodePoints[i] = i
  301. }
  302. const allCharsString = String.fromCharCode.apply(String, allCodePoints)
  303. const allCharsBufferUtf8 = Buffer.from(allCharsString)
  304. const allCharsBufferUcs2 = Buffer.from(allCharsString, 'ucs2')
  305. // Search for string long enough to trigger complex search with ASCII pattern
  306. // and UC16 subject.
  307. t.equal(-1, allCharsBufferUtf8.indexOf('notfound'))
  308. t.equal(-1, allCharsBufferUcs2.indexOf('notfound'))
  309. // Needle is longer than haystack, but only because it's encoded as UTF-16
  310. t.equal(Buffer.from('aaaa').indexOf('a'.repeat(4), 'ucs2'), -1)
  311. t.equal(Buffer.from('aaaa').indexOf('a'.repeat(4), 'utf8'), 0)
  312. t.equal(Buffer.from('aaaa').indexOf('你好', 'ucs2'), -1)
  313. // Haystack has odd length, but the needle is UCS2.
  314. t.equal(Buffer.from('aaaaa').indexOf('b', 'ucs2'), -1)
  315. {
  316. // Find substrings in Utf8.
  317. const lengths = [1, 3, 15] // Single char, simple and complex.
  318. const indices = [0x5, 0x60, 0x400, 0x680, 0x7ee, 0xFF02, 0x16610, 0x2f77b]
  319. for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) {
  320. for (let i = 0; i < indices.length; i++) {
  321. const index = indices[i]
  322. let length = lengths[lengthIndex]
  323. if (index + length > 0x7F) {
  324. length = 2 * length
  325. }
  326. if (index + length > 0x7FF) {
  327. length = 3 * length
  328. }
  329. if (index + length > 0xFFFF) {
  330. length = 4 * length
  331. }
  332. const patternBufferUtf8 = allCharsBufferUtf8.slice(index, index + length)
  333. t.equal(index, allCharsBufferUtf8.indexOf(patternBufferUtf8))
  334. const patternStringUtf8 = patternBufferUtf8.toString()
  335. t.equal(index, allCharsBufferUtf8.indexOf(patternStringUtf8))
  336. }
  337. }
  338. }
  339. {
  340. // Find substrings in Usc2.
  341. const lengths = [2, 4, 16] // Single char, simple and complex.
  342. const indices = [0x5, 0x65, 0x105, 0x205, 0x285, 0x2005, 0x2085, 0xfff0]
  343. for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) {
  344. for (let i = 0; i < indices.length; i++) {
  345. const index = indices[i] * 2
  346. const length = lengths[lengthIndex]
  347. const patternBufferUcs2 =
  348. allCharsBufferUcs2.slice(index, index + length)
  349. t.equal(
  350. index, allCharsBufferUcs2.indexOf(patternBufferUcs2, 0, 'ucs2'))
  351. const patternStringUcs2 = patternBufferUcs2.toString('ucs2')
  352. t.equal(
  353. index, allCharsBufferUcs2.indexOf(patternStringUcs2, 0, 'ucs2'))
  354. }
  355. }
  356. }
  357. [
  358. () => {},
  359. {},
  360. []
  361. ].forEach((val) => {
  362. t.throws(() => b.indexOf(val), TypeError, `"${JSON.stringify(val)}" should throw`)
  363. })
  364. // Test weird offset arguments.
  365. // The following offsets coerce to NaN or 0, searching the whole Buffer
  366. t.equal(b.indexOf('b', undefined), 1)
  367. t.equal(b.indexOf('b', {}), 1)
  368. t.equal(b.indexOf('b', 0), 1)
  369. t.equal(b.indexOf('b', null), 1)
  370. t.equal(b.indexOf('b', []), 1)
  371. // The following offset coerces to 2, in other words +[2] === 2
  372. t.equal(b.indexOf('b', [2]), -1)
  373. // Behavior should match String.indexOf()
  374. t.equal(
  375. b.indexOf('b', undefined),
  376. stringComparison.indexOf('b', undefined))
  377. t.equal(
  378. b.indexOf('b', {}),
  379. stringComparison.indexOf('b', {}))
  380. t.equal(
  381. b.indexOf('b', 0),
  382. stringComparison.indexOf('b', 0))
  383. t.equal(
  384. b.indexOf('b', null),
  385. stringComparison.indexOf('b', null))
  386. t.equal(
  387. b.indexOf('b', []),
  388. stringComparison.indexOf('b', []))
  389. t.equal(
  390. b.indexOf('b', [2]),
  391. stringComparison.indexOf('b', [2]))
  392. // test truncation of Number arguments to uint8
  393. {
  394. const buf = Buffer.from('this is a test')
  395. t.equal(buf.indexOf(0x6973), 3)
  396. t.equal(buf.indexOf(0x697320), 4)
  397. t.equal(buf.indexOf(0x69732069), 2)
  398. t.equal(buf.indexOf(0x697374657374), 0)
  399. t.equal(buf.indexOf(0x69737374), 0)
  400. t.equal(buf.indexOf(0x69737465), 11)
  401. t.equal(buf.indexOf(0x69737465), 11)
  402. t.equal(buf.indexOf(-140), 0)
  403. t.equal(buf.indexOf(-152), 1)
  404. t.equal(buf.indexOf(0xff), -1)
  405. t.equal(buf.indexOf(0xffff), -1)
  406. }
  407. // Test that Uint8Array arguments are okay.
  408. {
  409. const needle = new Uint8Array([0x66, 0x6f, 0x6f])
  410. const haystack = new BufferList(Buffer.from('a foo b foo'))
  411. t.equal(haystack.indexOf(needle), 2)
  412. }
  413. t.end()
  414. })