extract.js 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. "use strict"
  2. const htmlparser = require("htmlparser2")
  3. const TransformableString = require("./TransformableString")
  4. const NO_IGNORE = 0
  5. const IGNORE_NEXT = 1
  6. const IGNORE_UNTIL_ENABLE = 2
  7. function iterateScripts(code, options, onChunk) {
  8. if (!code) return
  9. const xmlMode = options.xmlMode
  10. const isJavaScriptMIMEType = options.isJavaScriptMIMEType || (() => true)
  11. const javaScriptTagNames = options.javaScriptTagNames || ["script"]
  12. let index = 0
  13. let inScript = false
  14. let cdata = []
  15. let ignoreState = NO_IGNORE
  16. const chunks = []
  17. function pushChunk(type, end) {
  18. chunks.push({ type, start: index, end, cdata })
  19. cdata = []
  20. index = end
  21. }
  22. const parser = new htmlparser.Parser(
  23. {
  24. onopentag(name, attrs) {
  25. // Test if current tag is a valid <script> tag.
  26. if (!javaScriptTagNames.includes(name)) {
  27. return
  28. }
  29. if (attrs.type && !isJavaScriptMIMEType(attrs.type)) {
  30. return
  31. }
  32. if (attrs.src) {
  33. return
  34. }
  35. if (ignoreState === IGNORE_NEXT) {
  36. ignoreState = NO_IGNORE
  37. return
  38. }
  39. if (ignoreState === IGNORE_UNTIL_ENABLE) {
  40. return
  41. }
  42. inScript = true
  43. pushChunk("html", parser.endIndex + 1)
  44. },
  45. oncdatastart() {
  46. cdata.push(
  47. {
  48. start: parser.startIndex,
  49. end: parser.startIndex + 9,
  50. },
  51. {
  52. start: parser.endIndex - 2,
  53. end: parser.endIndex + 1,
  54. }
  55. )
  56. },
  57. onclosetag(name) {
  58. if (!javaScriptTagNames.includes(name) || !inScript) {
  59. return
  60. }
  61. inScript = false
  62. if (parser.startIndex < chunks[chunks.length - 1].end) {
  63. // The parser didn't move its index after the previous chunk emited. It occurs on
  64. // self-closing tags (xml mode). Just ignore this script.
  65. return
  66. }
  67. pushChunk("script", parser.startIndex)
  68. },
  69. ontext() {
  70. if (!inScript) {
  71. return
  72. }
  73. pushChunk("script", parser.endIndex + 1)
  74. },
  75. oncomment(comment) {
  76. comment = comment.trim()
  77. if (comment === "eslint-disable") {
  78. ignoreState = IGNORE_UNTIL_ENABLE
  79. } else if (comment === "eslint-enable") {
  80. ignoreState = NO_IGNORE
  81. } else if (comment === "eslint-disable-next-script") {
  82. ignoreState = IGNORE_NEXT
  83. }
  84. },
  85. },
  86. {
  87. xmlMode: xmlMode === true,
  88. }
  89. )
  90. parser.parseComplete(code)
  91. pushChunk("html", parser.endIndex + 1)
  92. {
  93. const emitChunk = () => {
  94. const cdata = []
  95. for (let i = startChunkIndex; i < index; i += 1) {
  96. cdata.push.apply(cdata, chunks[i].cdata)
  97. }
  98. onChunk({
  99. type: chunks[startChunkIndex].type,
  100. start: chunks[startChunkIndex].start,
  101. end: chunks[index - 1].end,
  102. cdata,
  103. })
  104. }
  105. let startChunkIndex = 0
  106. let index
  107. for (index = 1; index < chunks.length; index += 1) {
  108. if (chunks[startChunkIndex].type === chunks[index].type) continue
  109. emitChunk()
  110. startChunkIndex = index
  111. }
  112. emitChunk()
  113. }
  114. }
  115. function computeIndent(descriptor, previousHTML, slice) {
  116. if (!descriptor) {
  117. const indentMatch = /[\n\r]+([ \t]*)/.exec(slice)
  118. return indentMatch ? indentMatch[1] : ""
  119. }
  120. if (descriptor.relative) {
  121. return previousHTML.match(/([^\n\r]*)<[^<]*$/)[1] + descriptor.spaces
  122. }
  123. return descriptor.spaces
  124. }
  125. function* dedent(indent, slice) {
  126. let hadNonEmptyLine = false
  127. const re = /(\r\n|\n|\r)([ \t]*)(.*)/g
  128. let lastIndex = 0
  129. while (true) {
  130. const match = re.exec(slice)
  131. if (!match) break
  132. const newLine = match[1]
  133. const lineIndent = match[2]
  134. const lineText = match[3]
  135. const isEmptyLine = !lineText
  136. const isFirstNonEmptyLine = !isEmptyLine && !hadNonEmptyLine
  137. const badIndentation =
  138. // Be stricter on the first line
  139. isFirstNonEmptyLine
  140. ? indent !== lineIndent
  141. : lineIndent.indexOf(indent) !== 0
  142. if (!badIndentation) {
  143. lastIndex = match.index + newLine.length + indent.length
  144. // Remove the first line if it is empty
  145. const fromIndex = match.index === 0 ? 0 : match.index + newLine.length
  146. yield {
  147. type: "dedent",
  148. from: fromIndex,
  149. to: lastIndex,
  150. }
  151. } else if (isEmptyLine) {
  152. yield {
  153. type: "empty",
  154. }
  155. } else {
  156. yield {
  157. type: "bad-indent",
  158. }
  159. }
  160. if (!isEmptyLine) {
  161. hadNonEmptyLine = true
  162. }
  163. }
  164. const endSpaces = slice.slice(lastIndex).match(/[ \t]*$/)[0].length
  165. if (endSpaces) {
  166. yield {
  167. type: "dedent",
  168. from: slice.length - endSpaces,
  169. to: slice.length,
  170. }
  171. }
  172. }
  173. function extract(
  174. code,
  175. indentDescriptor,
  176. xmlMode,
  177. javaScriptTagNames,
  178. isJavaScriptMIMEType
  179. ) {
  180. const badIndentationLines = []
  181. const codeParts = []
  182. let lineNumber = 1
  183. let previousHTML = ""
  184. iterateScripts(
  185. code,
  186. { xmlMode, javaScriptTagNames, isJavaScriptMIMEType },
  187. (chunk) => {
  188. const slice = code.slice(chunk.start, chunk.end)
  189. if (chunk.type === "html") {
  190. const match = slice.match(/\r\n|\n|\r/g)
  191. if (match) lineNumber += match.length
  192. previousHTML = slice
  193. } else if (chunk.type === "script") {
  194. const transformedCode = new TransformableString(code)
  195. let indentSlice = slice
  196. for (const cdata of chunk.cdata) {
  197. transformedCode.replace(cdata.start, cdata.end, "")
  198. if (cdata.end === chunk.end) {
  199. indentSlice = code.slice(chunk.start, cdata.start)
  200. }
  201. }
  202. transformedCode.replace(0, chunk.start, "")
  203. transformedCode.replace(chunk.end, code.length, "")
  204. for (const action of dedent(
  205. computeIndent(indentDescriptor, previousHTML, indentSlice),
  206. indentSlice
  207. )) {
  208. lineNumber += 1
  209. if (action.type === "dedent") {
  210. transformedCode.replace(
  211. chunk.start + action.from,
  212. chunk.start + action.to,
  213. ""
  214. )
  215. } else if (action.type === "bad-indent") {
  216. badIndentationLines.push(lineNumber)
  217. }
  218. }
  219. codeParts.push(transformedCode)
  220. }
  221. }
  222. )
  223. return {
  224. code: codeParts,
  225. badIndentationLines,
  226. hasBOM: code.startsWith("\uFEFF"),
  227. }
  228. }
  229. module.exports = extract