htmlparser.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. // Copyright 2006-2008, The Google Caja project.
  2. // Modifications Copyright 2009 The Closure Library Authors.
  3. // All Rights Reserved
  4. /**
  5. * @license Portions of this code are from the google-caja project, received by
  6. * Google under the Apache license (http://code.google.com/p/google-caja/).
  7. * All other code is Copyright 2009 Google, Inc. All Rights Reserved.
  8. // Copyright (C) 2006 Google Inc.
  9. //
  10. // Licensed under the Apache License, Version 2.0 (the "License");
  11. // you may not use this file except in compliance with the License.
  12. // You may obtain a copy of the License at
  13. //
  14. // http://www.apache.org/licenses/LICENSE-2.0
  15. //
  16. // Unless required by applicable law or agreed to in writing, software
  17. // distributed under the License is distributed on an "AS IS" BASIS,
  18. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. // See the License for the specific language governing permissions and
  20. // limitations under the License.
  21. */
  22. /**
  23. * @fileoverview A Html SAX parser.
  24. *
  25. * Examples of usage of the {@code goog.string.html.HtmlParser}:
  26. * <pre>
  27. * var handler = new MyCustomHtmlVisitorHandlerThatExtendsHtmlSaxHandler();
  28. * var parser = new goog.string.html.HtmlParser();
  29. * parser.parse(handler, '<html><a href="google.com">link found!</a></html>');
  30. * </pre>
  31. *
  32. * TODO(user, msamuel): validate sanitizer regex against the HTML5 grammar at
  33. * http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html
  34. * http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
  35. * http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
  36. * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html
  37. *
  38. * @author msamuel@google.com (Mike Samuel)
  39. * @supported IE6+, FF1.5+, Chrome 3.0+, Safari and Opera 10.
  40. */
  41. goog.provide('goog.string.html');
  42. goog.provide('goog.string.html.HtmlParser');
  43. goog.provide('goog.string.html.HtmlParser.EFlags');
  44. goog.provide('goog.string.html.HtmlParser.Elements');
  45. goog.provide('goog.string.html.HtmlParser.Entities');
  46. goog.provide('goog.string.html.HtmlSaxHandler');
  47. /**
  48. * An Html parser: {@code parse} takes a string and calls methods on
  49. * {@code goog.string.html.HtmlSaxHandler} while it is visiting it.
  50. *
  51. * @constructor
  52. */
  53. goog.string.html.HtmlParser = function() {
  54. };
  55. /**
  56. * HTML entities that are encoded/decoded.
  57. * TODO(user): use {@code goog.string.htmlEncode} instead.
  58. * @type {!Object<string, string>}
  59. */
  60. goog.string.html.HtmlParser.Entities = {
  61. 'lt': '<',
  62. 'gt': '>',
  63. 'amp': '&',
  64. 'nbsp': '\u00a0',
  65. 'quot': '"',
  66. 'apos': '\''
  67. };
  68. /**
  69. * The html eflags, used internally on the parser.
  70. * @enum {number}
  71. */
  72. goog.string.html.HtmlParser.EFlags = {
  73. OPTIONAL_ENDTAG: 1,
  74. EMPTY: 2,
  75. CDATA: 4,
  76. RCDATA: 8,
  77. UNSAFE: 16,
  78. FOLDABLE: 32
  79. };
  80. /**
  81. * A map of element to a bitmap of flags it has, used internally on the parser.
  82. * @type {Object<string,number>}
  83. */
  84. goog.string.html.HtmlParser.Elements = {
  85. 'a': 0,
  86. 'abbr': 0,
  87. 'acronym': 0,
  88. 'address': 0,
  89. 'applet': goog.string.html.HtmlParser.EFlags.UNSAFE,
  90. 'area': goog.string.html.HtmlParser.EFlags.EMPTY,
  91. 'b': 0,
  92. 'base': goog.string.html.HtmlParser.EFlags.EMPTY |
  93. goog.string.html.HtmlParser.EFlags.UNSAFE,
  94. 'basefont': goog.string.html.HtmlParser.EFlags.EMPTY |
  95. goog.string.html.HtmlParser.EFlags.UNSAFE,
  96. 'bdo': 0,
  97. 'big': 0,
  98. 'blockquote': 0,
  99. 'body': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG |
  100. goog.string.html.HtmlParser.EFlags.UNSAFE |
  101. goog.string.html.HtmlParser.EFlags.FOLDABLE,
  102. 'br': goog.string.html.HtmlParser.EFlags.EMPTY,
  103. 'button': 0,
  104. 'canvas': 0,
  105. 'caption': 0,
  106. 'center': 0,
  107. 'cite': 0,
  108. 'code': 0,
  109. 'col': goog.string.html.HtmlParser.EFlags.EMPTY,
  110. 'colgroup': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  111. 'dd': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  112. 'del': 0,
  113. 'dfn': 0,
  114. 'dir': 0,
  115. 'div': 0,
  116. 'dl': 0,
  117. 'dt': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  118. 'em': 0,
  119. 'fieldset': 0,
  120. 'font': 0,
  121. 'form': 0,
  122. 'frame': goog.string.html.HtmlParser.EFlags.EMPTY |
  123. goog.string.html.HtmlParser.EFlags.UNSAFE,
  124. 'frameset': goog.string.html.HtmlParser.EFlags.UNSAFE,
  125. 'h1': 0,
  126. 'h2': 0,
  127. 'h3': 0,
  128. 'h4': 0,
  129. 'h5': 0,
  130. 'h6': 0,
  131. 'head': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG |
  132. goog.string.html.HtmlParser.EFlags.UNSAFE |
  133. goog.string.html.HtmlParser.EFlags.FOLDABLE,
  134. 'hr': goog.string.html.HtmlParser.EFlags.EMPTY,
  135. 'html': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG |
  136. goog.string.html.HtmlParser.EFlags.UNSAFE |
  137. goog.string.html.HtmlParser.EFlags.FOLDABLE,
  138. 'i': 0,
  139. 'iframe': goog.string.html.HtmlParser.EFlags.UNSAFE |
  140. goog.string.html.HtmlParser.EFlags.CDATA,
  141. 'img': goog.string.html.HtmlParser.EFlags.EMPTY,
  142. 'input': goog.string.html.HtmlParser.EFlags.EMPTY,
  143. 'ins': 0,
  144. 'isindex': goog.string.html.HtmlParser.EFlags.EMPTY |
  145. goog.string.html.HtmlParser.EFlags.UNSAFE,
  146. 'kbd': 0,
  147. 'label': 0,
  148. 'legend': 0,
  149. 'li': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  150. 'link': goog.string.html.HtmlParser.EFlags.EMPTY |
  151. goog.string.html.HtmlParser.EFlags.UNSAFE,
  152. 'map': 0,
  153. 'menu': 0,
  154. 'meta': goog.string.html.HtmlParser.EFlags.EMPTY |
  155. goog.string.html.HtmlParser.EFlags.UNSAFE,
  156. 'noframes': goog.string.html.HtmlParser.EFlags.UNSAFE |
  157. goog.string.html.HtmlParser.EFlags.CDATA,
  158. 'noscript': goog.string.html.HtmlParser.EFlags.UNSAFE |
  159. goog.string.html.HtmlParser.EFlags.CDATA,
  160. 'object': goog.string.html.HtmlParser.EFlags.UNSAFE,
  161. 'ol': 0,
  162. 'optgroup': 0,
  163. 'option': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  164. 'p': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  165. 'param': goog.string.html.HtmlParser.EFlags.EMPTY |
  166. goog.string.html.HtmlParser.EFlags.UNSAFE,
  167. 'pre': 0,
  168. 'q': 0,
  169. 's': 0,
  170. 'samp': 0,
  171. 'script': goog.string.html.HtmlParser.EFlags.UNSAFE |
  172. goog.string.html.HtmlParser.EFlags.CDATA,
  173. 'select': 0,
  174. 'small': 0,
  175. 'span': 0,
  176. 'strike': 0,
  177. 'strong': 0,
  178. 'style': goog.string.html.HtmlParser.EFlags.UNSAFE |
  179. goog.string.html.HtmlParser.EFlags.CDATA,
  180. 'sub': 0,
  181. 'sup': 0,
  182. 'table': 0,
  183. 'tbody': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  184. 'td': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  185. 'textarea': goog.string.html.HtmlParser.EFlags.RCDATA,
  186. 'tfoot': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  187. 'th': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  188. 'thead': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  189. 'title': goog.string.html.HtmlParser.EFlags.RCDATA |
  190. goog.string.html.HtmlParser.EFlags.UNSAFE,
  191. 'tr': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
  192. 'tt': 0,
  193. 'u': 0,
  194. 'ul': 0,
  195. 'var': 0
  196. };
  197. /**
  198. * Regular expression that matches &s.
  199. * @type {RegExp}
  200. * @package
  201. */
  202. goog.string.html.HtmlParser.AMP_RE = /&/g;
  203. /**
  204. * Regular expression that matches loose &s.
  205. * @type {RegExp}
  206. * @private
  207. */
  208. goog.string.html.HtmlParser.LOOSE_AMP_RE_ =
  209. /&([^a-z#]|#(?:[^0-9x]|x(?:[^0-9a-f]|$)|$)|$)/gi;
  210. /**
  211. * Regular expression that matches <.
  212. * @type {RegExp}
  213. * @package
  214. */
  215. goog.string.html.HtmlParser.LT_RE = /</g;
  216. /**
  217. * Regular expression that matches >.
  218. * @type {RegExp}
  219. * @package
  220. */
  221. goog.string.html.HtmlParser.GT_RE = />/g;
  222. /**
  223. * Regular expression that matches ".
  224. * @type {RegExp}
  225. * @package
  226. */
  227. goog.string.html.HtmlParser.QUOTE_RE = /\"/g;
  228. /**
  229. * Regular expression that matches =.
  230. * @type {RegExp}
  231. * @package
  232. */
  233. goog.string.html.HtmlParser.EQUALS_RE = /=/g;
  234. /**
  235. * Regular expression that matches null characters.
  236. * @type {RegExp}
  237. * @private
  238. */
  239. goog.string.html.HtmlParser.NULL_RE_ = /\0/g;
  240. /**
  241. * Regular expression that matches entities.
  242. * @type {RegExp}
  243. * @private
  244. */
  245. goog.string.html.HtmlParser.ENTITY_RE_ = /&(#\d+|#x[0-9A-Fa-f]+|\w+);/g;
  246. /**
  247. * Regular expression that matches decimal numbers.
  248. * @type {RegExp}
  249. * @private
  250. */
  251. goog.string.html.HtmlParser.DECIMAL_ESCAPE_RE_ = /^#(\d+)$/;
  252. /**
  253. * Regular expression that matches hexadecimal numbers.
  254. * @type {RegExp}
  255. * @private
  256. */
  257. goog.string.html.HtmlParser.HEX_ESCAPE_RE_ = /^#x([0-9A-Fa-f]+)$/;
  258. /**
  259. * Regular expression that matches the next token to be processed.
  260. * @type {RegExp}
  261. * @private
  262. */
  263. goog.string.html.HtmlParser.INSIDE_TAG_TOKEN_ = new RegExp(
  264. // Don't capture space.
  265. '^\\s*(?:' +
  266. // Capture an attribute name in group 1, and value in group 3.
  267. // We capture the fact that there was an attribute in group 2, since
  268. // interpreters are inconsistent in whether a group that matches nothing
  269. // is null, undefined, or the empty string.
  270. ('(?:' +
  271. '([a-z][a-z-]*)' + // attribute name
  272. ('(' + // optionally followed
  273. '\\s*=\\s*' +
  274. ('(' +
  275. // A double quoted string.
  276. '\"[^\"]*\"' +
  277. // A single quoted string.
  278. '|\'[^\']*\'' +
  279. // The positive lookahead is used to make sure that in
  280. // <foo bar= baz=boo>, the value for bar is blank, not "baz=boo".
  281. '|(?=[a-z][a-z-]*\\s*=)' +
  282. // An unquoted value that is not an attribute name.
  283. // We know it is not an attribute name because the previous
  284. // zero-width match would've eliminated that possibility.
  285. '|[^>\"\'\\s]*' +
  286. ')'
  287. ) +
  288. ')'
  289. ) + '?' +
  290. ')'
  291. ) +
  292. // End of tag captured in group 3.
  293. '|(/?>)' +
  294. // Don't capture cruft
  295. '|[^a-z\\s>]+)',
  296. 'i');
  297. /**
  298. * Regular expression that matches the next token to be processed when we are
  299. * outside a tag.
  300. * @type {RegExp}
  301. * @private
  302. */
  303. goog.string.html.HtmlParser.OUTSIDE_TAG_TOKEN_ = new RegExp(
  304. '^(?:' +
  305. // Entity captured in group 1.
  306. '&(\\#[0-9]+|\\#[x][0-9a-f]+|\\w+);' +
  307. // Comment, doctypes, and processing instructions not captured.
  308. '|<[!]--[\\s\\S]*?-->|<!\\w[^>]*>|<\\?[^>*]*>' +
  309. // '/' captured in group 2 for close tags, and name captured in group 3.
  310. '|<(/)?([a-z][a-z0-9]*)' +
  311. // Text captured in group 4.
  312. '|([^<&>]+)' +
  313. // Cruft captured in group 5.
  314. '|([<&>]))',
  315. 'i');
  316. /**
  317. * Given a SAX-like {@code goog.string.html.HtmlSaxHandler} parses a
  318. * {@code htmlText} and lets the {@code handler} know the structure while
  319. * visiting the nodes.
  320. *
  321. * @param {goog.string.html.HtmlSaxHandler} handler The HtmlSaxHandler that will
  322. * receive the events.
  323. * @param {string} htmlText The html text.
  324. */
  325. goog.string.html.HtmlParser.prototype.parse = function(handler, htmlText) {
  326. var htmlLower = null;
  327. var inTag = false; // True iff we're currently processing a tag.
  328. var attribs = []; // Accumulates attribute names and values.
  329. var tagName; // The name of the tag currently being processed.
  330. var eflags; // The element flags for the current tag.
  331. var openTag; // True if the current tag is an open tag.
  332. // Lets the handler know that we are starting to parse the document.
  333. handler.startDoc();
  334. // Consumes tokens from the htmlText and stops once all tokens are processed.
  335. while (htmlText) {
  336. var regex = inTag ?
  337. goog.string.html.HtmlParser.INSIDE_TAG_TOKEN_ :
  338. goog.string.html.HtmlParser.OUTSIDE_TAG_TOKEN_;
  339. // Gets the next token
  340. var m = htmlText.match(regex);
  341. // And removes it from the string
  342. htmlText = htmlText.substring(m[0].length);
  343. // TODO(goto): cleanup this code breaking it into separate methods.
  344. if (inTag) {
  345. if (m[1]) { // Attribute.
  346. // SetAttribute with uppercase names doesn't work on IE6.
  347. var attribName = goog.string.html.toLowerCase(m[1]);
  348. var decodedValue;
  349. if (m[2]) {
  350. var encodedValue = m[3];
  351. switch (encodedValue.charCodeAt(0)) { // Strip quotes.
  352. case 34: case 39:
  353. encodedValue = encodedValue.substring(
  354. 1, encodedValue.length - 1);
  355. break;
  356. }
  357. decodedValue = this.unescapeEntities_(this.stripNULs_(encodedValue));
  358. } else {
  359. // Use name as value for valueless attribs, so
  360. // <input type=checkbox checked>
  361. // gets attributes ['type', 'checkbox', 'checked', 'checked']
  362. decodedValue = attribName;
  363. }
  364. attribs.push(attribName, decodedValue);
  365. } else if (m[4]) {
  366. if (eflags !== void 0) { // False if not in whitelist.
  367. if (openTag) {
  368. if (handler.startTag) {
  369. handler.startTag(/** @type {string} */ (tagName), attribs);
  370. }
  371. } else {
  372. if (handler.endTag) {
  373. handler.endTag(/** @type {string} */ (tagName));
  374. }
  375. }
  376. }
  377. if (openTag && (eflags &
  378. (goog.string.html.HtmlParser.EFlags.CDATA |
  379. goog.string.html.HtmlParser.EFlags.RCDATA))) {
  380. if (htmlLower === null) {
  381. htmlLower = goog.string.html.toLowerCase(htmlText);
  382. } else {
  383. htmlLower = htmlLower.substring(
  384. htmlLower.length - htmlText.length);
  385. }
  386. var dataEnd = htmlLower.indexOf('</' + tagName);
  387. if (dataEnd < 0) {
  388. dataEnd = htmlText.length;
  389. }
  390. if (eflags & goog.string.html.HtmlParser.EFlags.CDATA) {
  391. if (handler.cdata) {
  392. handler.cdata(htmlText.substring(0, dataEnd));
  393. }
  394. } else if (handler.rcdata) {
  395. handler.rcdata(
  396. this.normalizeRCData_(htmlText.substring(0, dataEnd)));
  397. }
  398. htmlText = htmlText.substring(dataEnd);
  399. }
  400. tagName = eflags = openTag = void 0;
  401. attribs.length = 0;
  402. inTag = false;
  403. }
  404. } else {
  405. if (m[1]) { // Entity.
  406. handler.pcdata(m[0]);
  407. } else if (m[3]) { // Tag.
  408. openTag = !m[2];
  409. inTag = true;
  410. tagName = goog.string.html.toLowerCase(m[3]);
  411. eflags = goog.string.html.HtmlParser.Elements.hasOwnProperty(tagName) ?
  412. goog.string.html.HtmlParser.Elements[tagName] : void 0;
  413. } else if (m[4]) { // Text.
  414. handler.pcdata(m[4]);
  415. } else if (m[5]) { // Cruft.
  416. switch (m[5]) {
  417. case '<': handler.pcdata('&lt;'); break;
  418. case '>': handler.pcdata('&gt;'); break;
  419. default: handler.pcdata('&amp;'); break;
  420. }
  421. }
  422. }
  423. }
  424. // Lets the handler know that we are done parsing the document.
  425. handler.endDoc();
  426. };
  427. /**
  428. * Decodes an HTML entity.
  429. *
  430. * @param {string} name The content between the '&' and the ';'.
  431. * @return {string} A single unicode code-point as a string.
  432. * @private
  433. */
  434. goog.string.html.HtmlParser.prototype.lookupEntity_ = function(name) {
  435. // TODO(goto): use {goog.string.htmlDecode} instead ?
  436. // TODO(goto): &pi; is different from &Pi;
  437. name = goog.string.html.toLowerCase(name);
  438. if (goog.string.html.HtmlParser.Entities.hasOwnProperty(name)) {
  439. return goog.string.html.HtmlParser.Entities[name];
  440. }
  441. var m = name.match(goog.string.html.HtmlParser.DECIMAL_ESCAPE_RE_);
  442. if (m) {
  443. return String.fromCharCode(parseInt(m[1], 10));
  444. } else if (m = name.match(goog.string.html.HtmlParser.HEX_ESCAPE_RE_)) {
  445. return String.fromCharCode(parseInt(m[1], 16));
  446. }
  447. return '';
  448. };
  449. /**
  450. * Removes null characters on the string.
  451. * @param {string} s The string to have the null characters removed.
  452. * @return {string} A string without null characters.
  453. * @private
  454. */
  455. goog.string.html.HtmlParser.prototype.stripNULs_ = function(s) {
  456. return s.replace(goog.string.html.HtmlParser.NULL_RE_, '');
  457. };
  458. /**
  459. * The plain text of a chunk of HTML CDATA which possibly containing.
  460. *
  461. * TODO(goto): use {@code goog.string.unescapeEntities} instead ?
  462. * @param {string} s A chunk of HTML CDATA. It must not start or end inside
  463. * an HTML entity.
  464. * @return {string} The unescaped entities.
  465. * @private
  466. */
  467. goog.string.html.HtmlParser.prototype.unescapeEntities_ = function(s) {
  468. return s.replace(
  469. goog.string.html.HtmlParser.ENTITY_RE_, goog.bind(
  470. function(fullEntity, name) {
  471. return this.lookupEntity_(name);
  472. }, this));
  473. };
  474. /**
  475. * Escape entities in RCDATA that can be escaped without changing the meaning.
  476. * @param {string} rcdata The RCDATA string we want to normalize.
  477. * @return {string} A normalized version of RCDATA.
  478. * @private
  479. */
  480. goog.string.html.HtmlParser.prototype.normalizeRCData_ = function(rcdata) {
  481. return rcdata.
  482. replace(goog.string.html.HtmlParser.LOOSE_AMP_RE_, '&amp;$1').
  483. replace(goog.string.html.HtmlParser.LT_RE, '&lt;').
  484. replace(goog.string.html.HtmlParser.GT_RE, '&gt;');
  485. };
  486. /**
  487. * TODO(goto): why isn't this in the string package ? does this solves any
  488. * real problem ? move it to the goog.string package if it does.
  489. *
  490. * @param {string} str The string to lower case.
  491. * @return {string} The str in lower case format.
  492. */
  493. goog.string.html.toLowerCase = function(str) {
  494. // The below may not be true on browsers in the Turkish locale.
  495. if ('script' === 'SCRIPT'.toLowerCase()) {
  496. return str.toLowerCase();
  497. } else {
  498. return str.replace(/[A-Z]/g, function(ch) {
  499. return String.fromCharCode(ch.charCodeAt(0) | 32);
  500. });
  501. }
  502. };
  503. /**
  504. * An interface to the {@code goog.string.html.HtmlParser} visitor, that gets
  505. * called while the HTML is being parsed.
  506. *
  507. * @interface
  508. */
  509. goog.string.html.HtmlSaxHandler = function() {
  510. };
  511. /**
  512. * Handler called when the parser found a new tag.
  513. * @param {string} name The name of the tag that is starting.
  514. * @param {Array<string>} attributes The attributes of the tag.
  515. */
  516. goog.string.html.HtmlSaxHandler.prototype.startTag = goog.abstractMethod;
  517. /**
  518. * Handler called when the parser found a closing tag.
  519. * @param {string} name The name of the tag that is ending.
  520. */
  521. goog.string.html.HtmlSaxHandler.prototype.endTag = goog.abstractMethod;
  522. /**
  523. * Handler called when PCDATA is found.
  524. * @param {string} text The PCDATA text found.
  525. */
  526. goog.string.html.HtmlSaxHandler.prototype.pcdata = goog.abstractMethod;
  527. /**
  528. * Handler called when RCDATA is found.
  529. * @param {string} text The RCDATA text found.
  530. */
  531. goog.string.html.HtmlSaxHandler.prototype.rcdata = goog.abstractMethod;
  532. /**
  533. * Handler called when CDATA is found.
  534. * @param {string} text The CDATA text found.
  535. */
  536. goog.string.html.HtmlSaxHandler.prototype.cdata = goog.abstractMethod;
  537. /**
  538. * Handler called when the parser is starting to parse the document.
  539. */
  540. goog.string.html.HtmlSaxHandler.prototype.startDoc = goog.abstractMethod;
  541. /**
  542. * Handler called when the parsing is done.
  543. */
  544. goog.string.html.HtmlSaxHandler.prototype.endDoc = goog.abstractMethod;