Tokenizer.d.ts 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. /** All the states the tokenizer can be in. */
  2. declare const enum State {
  3. Text = 1,
  4. BeforeTagName = 2,
  5. InTagName = 3,
  6. InSelfClosingTag = 4,
  7. BeforeClosingTagName = 5,
  8. InClosingTagName = 6,
  9. AfterClosingTagName = 7,
  10. BeforeAttributeName = 8,
  11. InAttributeName = 9,
  12. AfterAttributeName = 10,
  13. BeforeAttributeValue = 11,
  14. InAttributeValueDq = 12,
  15. InAttributeValueSq = 13,
  16. InAttributeValueNq = 14,
  17. BeforeDeclaration = 15,
  18. InDeclaration = 16,
  19. InProcessingInstruction = 17,
  20. BeforeComment = 18,
  21. InComment = 19,
  22. InSpecialComment = 20,
  23. AfterComment1 = 21,
  24. AfterComment2 = 22,
  25. BeforeCdata1 = 23,
  26. BeforeCdata2 = 24,
  27. BeforeCdata3 = 25,
  28. BeforeCdata4 = 26,
  29. BeforeCdata5 = 27,
  30. BeforeCdata6 = 28,
  31. InCdata = 29,
  32. AfterCdata1 = 30,
  33. AfterCdata2 = 31,
  34. BeforeSpecialS = 32,
  35. BeforeSpecialSEnd = 33,
  36. BeforeScript1 = 34,
  37. BeforeScript2 = 35,
  38. BeforeScript3 = 36,
  39. BeforeScript4 = 37,
  40. BeforeScript5 = 38,
  41. AfterScript1 = 39,
  42. AfterScript2 = 40,
  43. AfterScript3 = 41,
  44. AfterScript4 = 42,
  45. AfterScript5 = 43,
  46. BeforeStyle1 = 44,
  47. BeforeStyle2 = 45,
  48. BeforeStyle3 = 46,
  49. BeforeStyle4 = 47,
  50. AfterStyle1 = 48,
  51. AfterStyle2 = 49,
  52. AfterStyle3 = 50,
  53. AfterStyle4 = 51,
  54. BeforeSpecialT = 52,
  55. BeforeSpecialTEnd = 53,
  56. BeforeTitle1 = 54,
  57. BeforeTitle2 = 55,
  58. BeforeTitle3 = 56,
  59. BeforeTitle4 = 57,
  60. AfterTitle1 = 58,
  61. AfterTitle2 = 59,
  62. AfterTitle3 = 60,
  63. AfterTitle4 = 61,
  64. BeforeEntity = 62,
  65. BeforeNumericEntity = 63,
  66. InNamedEntity = 64,
  67. InNumericEntity = 65,
  68. InHexEntity = 66
  69. }
  70. export interface Callbacks {
  71. onattribdata(value: string): void;
  72. onattribend(quote: string | undefined | null): void;
  73. onattribname(name: string): void;
  74. oncdata(data: string): void;
  75. onclosetag(name: string): void;
  76. oncomment(data: string): void;
  77. ondeclaration(content: string): void;
  78. onend(): void;
  79. onerror(error: Error, state?: State): void;
  80. onopentagend(): void;
  81. onopentagname(name: string): void;
  82. onprocessinginstruction(instruction: string): void;
  83. onselfclosingtag(): void;
  84. ontext(value: string): void;
  85. }
  86. export default class Tokenizer {
  87. /** The current state the tokenizer is in. */
  88. _state: State;
  89. /** The read buffer. */
  90. private buffer;
  91. /** The beginning of the section that is currently being read. */
  92. sectionStart: number;
  93. /** The index within the buffer that we are currently looking at. */
  94. _index: number;
  95. /**
  96. * Data that has already been processed will be removed from the buffer occasionally.
  97. * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
  98. */
  99. private bufferOffset;
  100. /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
  101. private baseState;
  102. /** For special parsing behavior inside of script and style tags. */
  103. private special;
  104. /** Indicates whether the tokenizer has been paused. */
  105. private running;
  106. /** Indicates whether the tokenizer has finished running / `.end` has been called. */
  107. private ended;
  108. private readonly cbs;
  109. private readonly xmlMode;
  110. private readonly decodeEntities;
  111. constructor(options: {
  112. xmlMode?: boolean;
  113. decodeEntities?: boolean;
  114. } | null, cbs: Callbacks);
  115. reset(): void;
  116. write(chunk: string): void;
  117. end(chunk?: string): void;
  118. pause(): void;
  119. resume(): void;
  120. /**
  121. * The current index within all of the written data.
  122. */
  123. getAbsoluteIndex(): number;
  124. private stateText;
  125. /**
  126. * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
  127. *
  128. * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
  129. * We allow anything that wouldn't end the tag.
  130. */
  131. private isTagStartChar;
  132. private stateBeforeTagName;
  133. private stateInTagName;
  134. private stateBeforeClosingTagName;
  135. private stateInClosingTagName;
  136. private stateAfterClosingTagName;
  137. private stateBeforeAttributeName;
  138. private stateInSelfClosingTag;
  139. private stateInAttributeName;
  140. private stateAfterAttributeName;
  141. private stateBeforeAttributeValue;
  142. private handleInAttributeValue;
  143. private stateInAttributeValueDoubleQuotes;
  144. private stateInAttributeValueSingleQuotes;
  145. private stateInAttributeValueNoQuotes;
  146. private stateBeforeDeclaration;
  147. private stateInDeclaration;
  148. private stateInProcessingInstruction;
  149. private stateBeforeComment;
  150. private stateInComment;
  151. private stateInSpecialComment;
  152. private stateAfterComment1;
  153. private stateAfterComment2;
  154. private stateBeforeCdata6;
  155. private stateInCdata;
  156. private stateAfterCdata1;
  157. private stateAfterCdata2;
  158. private stateBeforeSpecialS;
  159. private stateBeforeSpecialSEnd;
  160. private stateBeforeSpecialLast;
  161. private stateAfterSpecialLast;
  162. private parseFixedEntity;
  163. private parseLegacyEntity;
  164. private stateInNamedEntity;
  165. private decodeNumericEntity;
  166. private stateInNumericEntity;
  167. private stateInHexEntity;
  168. private cleanup;
  169. /**
  170. * Iterates through the buffer, calling the function corresponding to the current state.
  171. *
  172. * States that are more likely to be hit are higher up, as a performance improvement.
  173. */
  174. private parse;
  175. private finish;
  176. private handleTrailingData;
  177. private getSection;
  178. private emitToken;
  179. private emitPartial;
  180. }
  181. export {};
  182. //# sourceMappingURL=Tokenizer.d.ts.map