htmlsanitizer.js 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483
  1. // Copyright 2016 The Closure Library Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS-IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /**
  15. * @fileoverview An HTML sanitizer that can satisfy a variety of security
  16. * policies.
  17. *
  18. * This package provides html sanitizing functions. It does not enforce string
  19. * to string conversion, instead returning a dom-like element when possible.
  20. *
  21. * Examples of usage of the static {@code goog.goog.html.sanitizer.sanitize}:
  22. * <pre>
  23. * var safeHtml = goog.html.sanitizer.sanitize('<script src="xss.js" />');
  24. * goog.dom.safe.setInnerHtml(el, safeHtml);
  25. * </pre>
  26. *
  27. * @supported IE 10+, Chrome 26+, Firefox 22+, Safari 7.1+, Opera 15+
  28. */
  29. goog.provide('goog.html.sanitizer.HtmlSanitizer');
  30. goog.provide('goog.html.sanitizer.HtmlSanitizer.Builder');
  31. goog.provide('goog.html.sanitizer.HtmlSanitizerAttributePolicy');
  32. goog.provide('goog.html.sanitizer.HtmlSanitizerPolicy');
  33. goog.provide('goog.html.sanitizer.HtmlSanitizerPolicyContext');
  34. goog.provide('goog.html.sanitizer.HtmlSanitizerPolicyHints');
  35. goog.provide('goog.html.sanitizer.HtmlSanitizerUrlPolicy');
  36. goog.require('goog.array');
  37. goog.require('goog.asserts');
  38. goog.require('goog.dom');
  39. goog.require('goog.dom.NodeType');
  40. goog.require('goog.functions');
  41. goog.require('goog.html.SafeHtml');
  42. goog.require('goog.html.SafeStyle');
  43. goog.require('goog.html.SafeUrl');
  44. goog.require('goog.html.sanitizer.AttributeSanitizedWhitelist');
  45. goog.require('goog.html.sanitizer.AttributeWhitelist');
  46. goog.require('goog.html.sanitizer.CssSanitizer');
  47. goog.require('goog.html.sanitizer.TagBlacklist');
  48. goog.require('goog.html.sanitizer.TagWhitelist');
  49. goog.require('goog.html.uncheckedconversions');
  50. goog.require('goog.object');
  51. goog.require('goog.string');
  52. goog.require('goog.string.Const');
  53. goog.require('goog.userAgent');
  54. /**
  55. * Type for optional hints to policy handler functions.
  56. * @typedef {{
  57. * tagName: (string|undefined),
  58. * attributeName: (string|undefined),
  59. * cssProperty: (string|undefined)
  60. * }}
  61. */
  62. goog.html.sanitizer.HtmlSanitizerPolicyHints;
  63. /**
  64. * Type for optional context objects to the policy handler functions.
  65. * @typedef {{
  66. * cssStyle: (?CSSStyleDeclaration|undefined)
  67. * }}
  68. */
  69. goog.html.sanitizer.HtmlSanitizerPolicyContext;
  70. /**
  71. * Type for a policy function.
  72. * @typedef {function(string, goog.html.sanitizer.HtmlSanitizerPolicyHints=,
  73. * goog.html.sanitizer.HtmlSanitizerPolicyContext=,
  74. * goog.html.sanitizer.HtmlSanitizerPolicy=):?string}
  75. */
  76. goog.html.sanitizer.HtmlSanitizerPolicy;
  77. /**
  78. * Type for a URL policy function.
  79. *
  80. * @typedef {function(string, !goog.html.sanitizer.HtmlSanitizerPolicyHints=):
  81. * ?goog.html.SafeUrl}
  82. */
  83. goog.html.sanitizer.HtmlSanitizerUrlPolicy;
  84. /**
  85. * Type for attribute policy configuration.
  86. * @typedef {{
  87. * tagName: string,
  88. * attributeName: string,
  89. * policy: ?goog.html.sanitizer.HtmlSanitizerPolicy
  90. * }}
  91. */
  92. goog.html.sanitizer.HtmlSanitizerAttributePolicy;
  93. /**
  94. * Whether the HTML sanitizer is supported. For now mainly exclude
  95. * IE9 or below where we know the sanitizer is insecure.
  96. * @const @private {boolean}
  97. */
  98. goog.html.sanitizer.HTML_SANITIZER_SUPPORTED_ =
  99. !goog.userAgent.IE || document.documentMode >= 10;
  100. /**
  101. * Whether the template tag is supported.
  102. * @const @package
  103. */
  104. goog.html.sanitizer.HTML_SANITIZER_TEMPLATE_SUPPORTED =
  105. !goog.userAgent.IE || document.documentMode == null;
  106. /**
  107. * Prefix used by all internal html sanitizer booking properties.
  108. * @const @private {string}
  109. */
  110. goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_ = 'data-sanitizer-';
  111. /**
  112. * Temporary attribute name in which html sanitizer uses for bookkeeping.
  113. * @const @private {string}
  114. */
  115. goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_ATTR_NAME_ =
  116. goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_ + 'elem-num';
  117. /**
  118. * Attribute name added to span tags that replace unknown tags. The value of
  119. * this attribute is the name of the tag before the sanitization occurred.
  120. * @const @private
  121. */
  122. goog.html.sanitizer.HTML_SANITIZER_SANITIZED_ATTR_NAME_ =
  123. goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_ + 'original-tag';
  124. /**
  125. * Attribute name added to blacklisted tags to then filter them from the output.
  126. * @const @private
  127. */
  128. goog.html.sanitizer.HTML_SANITIZER_BLACKLISTED_TAG_ =
  129. goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_ + 'blacklisted-tag';
  130. /**
  131. * Map of property descriptors we use to avoid looking up the prototypes
  132. * multiple times.
  133. * @const @private {!Object<string, !ObjectPropertyDescriptor>}
  134. */
  135. goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_ =
  136. goog.html.sanitizer.HTML_SANITIZER_SUPPORTED_ ? {
  137. 'attributes':
  138. Object.getOwnPropertyDescriptor(Element.prototype, 'attributes'),
  139. 'setAttribute':
  140. Object.getOwnPropertyDescriptor(Element.prototype, 'setAttribute'),
  141. 'innerHTML':
  142. Object.getOwnPropertyDescriptor(Element.prototype, 'innerHTML'),
  143. 'nodeName': Object.getOwnPropertyDescriptor(Node.prototype, 'nodeName'),
  144. 'parentNode':
  145. Object.getOwnPropertyDescriptor(Node.prototype, 'parentNode'),
  146. 'childNodes':
  147. Object.getOwnPropertyDescriptor(Node.prototype, 'childNodes'),
  148. 'style': Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'style')
  149. } :
  150. {};
  151. /**
  152. * Creates an HTML sanitizer.
  153. * @param {!goog.html.sanitizer.HtmlSanitizer.Builder=} opt_builder
  154. * @final @constructor @struct
  155. */
  156. goog.html.sanitizer.HtmlSanitizer = function(opt_builder) {
  157. var builder = opt_builder || new goog.html.sanitizer.HtmlSanitizer.Builder();
  158. builder.installPolicies_();
  159. /** @private {boolean} */
  160. this.shouldSanitizeTemplateContents_ =
  161. builder.shouldSanitizeTemplateContents_;
  162. /** @private {!Object<string, !goog.html.sanitizer.HtmlSanitizerPolicy>} */
  163. this.attributeHandlers_ = goog.object.clone(builder.attributeWhitelist_);
  164. /** @private {!Object<string, boolean>} */
  165. this.tagBlacklist_ = goog.object.clone(builder.tagBlacklist_);
  166. /** @private {!Object<string, boolean>} */
  167. this.tagWhitelist_ = goog.object.clone(builder.tagWhitelist_);
  168. /** @private {boolean} */
  169. this.shouldAddOriginalTagNames_ = builder.shouldAddOriginalTagNames_;
  170. // Add whitelist data-* attributes from the builder to the attributeHandlers
  171. // with a default cleanUpAttribute function. data-* attributes are inert as
  172. // per HTML5 specs, so not much sanitization needed.
  173. goog.array.forEach(builder.dataAttributeWhitelist_, function(dataAttr) {
  174. goog.asserts.assert(goog.string.startsWith(dataAttr, 'data-'));
  175. goog.asserts.assert(!goog.string.startsWith(
  176. dataAttr, goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_));
  177. this.attributeHandlers_['* ' + dataAttr.toUpperCase()] =
  178. /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (
  179. goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_);
  180. }, this);
  181. };
  182. /**
  183. * Converts a HtmlSanitizerUrlPolicy to a HtmlSanitizerPolicy by calling the
  184. * HtmlSanitizerUrlPolicy with the required arguments and unwrapping the
  185. * returned SafeUrl.
  186. * @param {!goog.html.sanitizer.HtmlSanitizerUrlPolicy} customUrlPolicy
  187. * @return {!goog.html.sanitizer.HtmlSanitizerPolicy}
  188. * @private
  189. */
  190. goog.html.sanitizer.HtmlSanitizer.sanitizeUrl_ = function(customUrlPolicy) {
  191. return /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (
  192. function(url, policyHints) {
  193. var trimmed = goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_(
  194. url, policyHints);
  195. var safeUrl = customUrlPolicy(trimmed, policyHints);
  196. if (safeUrl && goog.html.SafeUrl.unwrap(safeUrl) !=
  197. goog.html.SafeUrl.INNOCUOUS_STRING) {
  198. return goog.html.SafeUrl.unwrap(safeUrl);
  199. } else {
  200. return null;
  201. }
  202. });
  203. };
  204. /**
  205. * The builder for the HTML Sanitizer. All methods except build return this.
  206. * @final @constructor @struct
  207. */
  208. goog.html.sanitizer.HtmlSanitizer.Builder = function() {
  209. /**
  210. * A set of attribute sanitization functions. Default built-in handlers are
  211. * all tag-agnostic by design. Note that some attributes behave differently
  212. * when attached to different nodes (for example, the href attribute will
  213. * generally not make a network request, but &lt;link href=""&gt; does), and
  214. * so when necessary a tag-specific handler can be used to override a
  215. * tag-agnostic one.
  216. * @private {!Object<string, !goog.html.sanitizer.HtmlSanitizerPolicy>}
  217. */
  218. this.attributeWhitelist_ = {};
  219. goog.array.forEach(
  220. [
  221. goog.html.sanitizer.AttributeWhitelist,
  222. goog.html.sanitizer.AttributeSanitizedWhitelist
  223. ],
  224. function(wl) {
  225. goog.array.forEach(goog.object.getKeys(wl), function(attr) {
  226. this.attributeWhitelist_[attr] =
  227. /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */
  228. (goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_);
  229. }, this);
  230. },
  231. this);
  232. /**
  233. * A set of attribute handlers that should not inherit their default policy
  234. * during build().
  235. * @private {!Object<string, boolean>}
  236. */
  237. this.attributeOverrideList_ = {};
  238. /**
  239. * Keeps track of whether we allow form tags.
  240. * @private {boolean}
  241. */
  242. this.allowFormTag_ = false;
  243. /**
  244. * Whether the content of TEMPLATE tags (assuming TEMPLATE is whitelisted)
  245. * should be sanitized or passed through.
  246. * @private {boolean}
  247. */
  248. this.shouldSanitizeTemplateContents_ = true;
  249. /**
  250. * List of data attributes to whitelist. Data-attributes are inert and don't
  251. * require sanitization.
  252. * @private {!Array<string>}
  253. */
  254. this.dataAttributeWhitelist_ = [];
  255. /**
  256. * A tag blacklist, to effectively remove an element and its children from the
  257. * dom.
  258. * @private {!Object<string, boolean>}
  259. */
  260. this.tagBlacklist_ = {};
  261. /**
  262. * A tag whitelist, to effectively allow an element and its children from the
  263. * dom.
  264. * @private {!Object<string, boolean>}
  265. */
  266. this.tagWhitelist_ = goog.object.clone(goog.html.sanitizer.TagWhitelist);
  267. /**
  268. * Whether non-whitelisted and non-blacklisted tags that have been converted
  269. * to &lt;span&rt; tags will contain the original tag in a data attribute.
  270. * @private {boolean}
  271. */
  272. this.shouldAddOriginalTagNames_ = false;
  273. /**
  274. * A function to be applied to URLs found on the parsing process which do not
  275. * trigger requests.
  276. * @private {!goog.html.sanitizer.HtmlSanitizerPolicy}
  277. */
  278. this.urlPolicy_ = goog.html.sanitizer.HtmlSanitizer.defaultUrlPolicy_;
  279. /**
  280. * A function to be applied to urls found on the parsing process which may
  281. * trigger requests.
  282. * @private {!goog.html.sanitizer.HtmlSanitizerPolicy}
  283. */
  284. this.networkRequestUrlPolicy_ =
  285. goog.html.sanitizer.HtmlSanitizer.defaultNetworkRequestUrlPolicy_;
  286. /**
  287. * A function to be applied to names found on the parsing process.
  288. * @private {!goog.html.sanitizer.HtmlSanitizerPolicy}
  289. */
  290. this.namePolicy_ = goog.html.sanitizer.HtmlSanitizer.defaultNamePolicy_;
  291. /**
  292. * A function to be applied to other tokens (i.e. classes and IDs) found on
  293. * the parsing process.
  294. * @private {!goog.html.sanitizer.HtmlSanitizerPolicy}
  295. */
  296. this.tokenPolicy_ = goog.html.sanitizer.HtmlSanitizer.defaultTokenPolicy_;
  297. /**
  298. * A function to sanitize inline CSS styles.
  299. * @private {(undefined|function(
  300. * !goog.html.sanitizer.HtmlSanitizerPolicy,
  301. * string,
  302. * !goog.html.sanitizer.HtmlSanitizerPolicyHints,
  303. * !goog.html.sanitizer.HtmlSanitizerPolicyContext):?string)}
  304. */
  305. this.sanitizeCssPolicy_ = undefined;
  306. /**
  307. * True iff policies have been installed for the instance.
  308. * @private {boolean}
  309. */
  310. this.policiesInstalled_ = false;
  311. };
  312. /**
  313. * Extends the list of allowed data attributes.
  314. * @param {!Array<string>} dataAttributeWhitelist
  315. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  316. */
  317. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.allowDataAttributes =
  318. function(dataAttributeWhitelist) {
  319. goog.array.extend(this.dataAttributeWhitelist_, dataAttributeWhitelist);
  320. return this;
  321. };
  322. /**
  323. * Allows form tags in the HTML. Without this all form tags and content will be
  324. * dropped.
  325. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  326. */
  327. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.allowFormTag = function() {
  328. this.allowFormTag_ = true;
  329. return this;
  330. };
  331. /**
  332. * Extends the tag whitelist (Package-internal utility method only).
  333. * @param {!Array<string>} tags The list of tags to be added to the whitelist.
  334. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  335. * @package
  336. */
  337. goog.html.sanitizer.HtmlSanitizer.Builder.prototype
  338. .alsoAllowTagsPrivateDoNotAccessOrElse = function(tags) {
  339. goog.array.forEach(tags, function(tag) {
  340. this.tagWhitelist_[tag.toUpperCase()] = true;
  341. }, this);
  342. return this;
  343. };
  344. /**
  345. * Extends the attribute whitelist (Package-internal utility method only).
  346. * @param {!Array<(string|!goog.html.sanitizer.HtmlSanitizerAttributePolicy)>}
  347. * attrs The list of attributes to be added to the whitelist.
  348. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  349. * @package
  350. */
  351. goog.html.sanitizer.HtmlSanitizer.Builder.prototype
  352. .alsoAllowAttributesPrivateDoNotAccessOrElse = function(attrs) {
  353. goog.array.forEach(attrs, function(attr) {
  354. if (goog.isString(attr)) {
  355. attr = {tagName: '*', attributeName: attr, policy: null};
  356. }
  357. var handlerName = goog.html.sanitizer.HtmlSanitizer.attrIdentifier_(
  358. attr.tagName, attr.attributeName);
  359. this.attributeWhitelist_[handlerName] = attr.policy ?
  360. attr.policy :
  361. /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (
  362. goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_);
  363. this.attributeOverrideList_[handlerName] = true;
  364. }, this);
  365. return this;
  366. };
  367. /**
  368. * Turns off sanitization of template tag contents and pass them unmodified
  369. * (Package-internal utility method only).
  370. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  371. * @throws {!Error}
  372. * @package
  373. */
  374. goog.html.sanitizer.HtmlSanitizer.Builder.prototype
  375. .keepUnsanitizedTemplateContentsPrivateDoNotAccessOrElse = function() {
  376. if (!goog.html.sanitizer.HTML_SANITIZER_TEMPLATE_SUPPORTED) {
  377. throw new Error(
  378. 'Cannot let unsanitized template contents through on ' +
  379. 'browsers that do not support TEMPLATE');
  380. }
  381. this.shouldSanitizeTemplateContents_ = false;
  382. return this;
  383. };
  384. /**
  385. * Allows only the provided whitelist of tags. Tags still need to be in the
  386. * TagWhitelist to be allowed.
  387. * <p>
  388. * SPAN tags are ALWAYS ALLOWED as part of the mechanism required to preserve
  389. * the HTML tree structure (when removing non-blacklisted tags and
  390. * non-whitelisted tags).
  391. * @param {!Array<string>} tagWhitelist
  392. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  393. * @throws {Error} Thrown if an attempt is made to allow a non-whitelisted tag.
  394. */
  395. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.onlyAllowTags = function(
  396. tagWhitelist) {
  397. this.tagWhitelist_ = {'SPAN': true};
  398. goog.array.forEach(tagWhitelist, function(tag) {
  399. tag = tag.toUpperCase();
  400. if (goog.html.sanitizer.TagWhitelist[tag]) {
  401. this.tagWhitelist_[tag] = true;
  402. } else {
  403. throw new Error(
  404. 'Only whitelisted tags can be allowed. See ' +
  405. 'goog.html.sanitizer.TagWhitelist');
  406. }
  407. }, this);
  408. return this;
  409. };
  410. /**
  411. * Allows only the provided whitelist of attributes, possibly setting a custom
  412. * policy for them. The set of tag/attribute combinations need to be a subset of
  413. * the currently allowed combinations.
  414. * <p>
  415. * Note that you cannot define a generic handler for an attribute if only a
  416. * tag-specific one is present, and vice versa. To configure the sanitizer to
  417. * accept an attribute only for a specific tag when only a generic handler is
  418. * whitelisted, use the goog.html.sanitizer.HtmlSanitizerPolicyHints parameter
  419. * and simply reject the attribute in unwanted tags.
  420. * <p>
  421. * Also note that the sanitizer's policy is still called after the provided one,
  422. * to ensure that supplying misconfigured policy cannot introduce
  423. * vulnerabilities. To completely override an existing attribute policy or to
  424. * allow new attributes, see the goog.html.sanitizer.unsafe package.
  425. * @param {!Array<(string|!goog.html.sanitizer.HtmlSanitizerAttributePolicy)>}
  426. * attrWhitelist The subset of attributes that the sanitizer will accept.
  427. * Attributes can come in of two forms:
  428. * - string: allow all values for this attribute on all tags.
  429. * - HtmlSanitizerAttributePolicy: allows specifying a policy for a
  430. * particular tag. The tagName can be "*", which means all tags. If no
  431. * policy is passed, the default is to allow all values.
  432. * The tag and attribute names are case-insensitive.
  433. * Note that the policy for id, URLs, names etc is controlled separately
  434. * (using withCustom* methods).
  435. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  436. * @throws {Error} Thrown if an attempt is made to allow a non-whitelisted
  437. * attribute.
  438. */
  439. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.onlyAllowAttributes =
  440. function(attrWhitelist) {
  441. var oldWhitelist = this.attributeWhitelist_;
  442. this.attributeWhitelist_ = {};
  443. goog.array.forEach(attrWhitelist, function(attr) {
  444. if (goog.typeOf(attr) === 'string') {
  445. attr = {tagName: '*', attributeName: attr.toUpperCase(), policy: null};
  446. }
  447. var handlerName = goog.html.sanitizer.HtmlSanitizer.attrIdentifier_(
  448. attr.tagName, attr.attributeName);
  449. if (!oldWhitelist[handlerName]) {
  450. throw new Error('Only whitelisted attributes can be allowed.');
  451. }
  452. this.attributeWhitelist_[handlerName] = attr.policy ?
  453. attr.policy :
  454. /** @type {goog.html.sanitizer.HtmlSanitizerPolicy} */ (
  455. goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_);
  456. }, this);
  457. return this;
  458. };
  459. /**
  460. * Adds the original tag name in the data attribute 'original-tag' when unknown
  461. * tags are sanitized to &lt;span&rt;, so that caller can distinguish them from
  462. * actual &lt;span&rt; tags.
  463. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  464. */
  465. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.addOriginalTagNames =
  466. function() {
  467. this.shouldAddOriginalTagNames_ = true;
  468. return this;
  469. };
  470. /**
  471. * Sets a custom network URL policy.
  472. * @param {!goog.html.sanitizer.HtmlSanitizerUrlPolicy}
  473. * customNetworkReqUrlPolicy
  474. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  475. */
  476. goog.html.sanitizer.HtmlSanitizer.Builder.prototype
  477. .withCustomNetworkRequestUrlPolicy = function(customNetworkReqUrlPolicy) {
  478. this.networkRequestUrlPolicy_ =
  479. goog.html.sanitizer.HtmlSanitizer.sanitizeUrl_(customNetworkReqUrlPolicy);
  480. return this;
  481. };
  482. /**
  483. * Sets a custom non-network URL policy.
  484. * @param {!goog.html.sanitizer.HtmlSanitizerUrlPolicy} customUrlPolicy
  485. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  486. */
  487. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.withCustomUrlPolicy =
  488. function(customUrlPolicy) {
  489. this.urlPolicy_ =
  490. goog.html.sanitizer.HtmlSanitizer.sanitizeUrl_(customUrlPolicy);
  491. return this;
  492. };
  493. /**
  494. * Sets a custom name policy.
  495. * @param {!goog.html.sanitizer.HtmlSanitizerPolicy} customNamePolicy
  496. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  497. */
  498. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.withCustomNamePolicy =
  499. function(customNamePolicy) {
  500. this.namePolicy_ = customNamePolicy;
  501. return this;
  502. };
  503. /**
  504. * Sets a custom token policy.
  505. * @param {!goog.html.sanitizer.HtmlSanitizerPolicy} customTokenPolicy
  506. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  507. */
  508. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.withCustomTokenPolicy =
  509. function(customTokenPolicy) {
  510. this.tokenPolicy_ = customTokenPolicy;
  511. return this;
  512. };
  513. /**
  514. * Allows inline CSS styles.
  515. * @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
  516. */
  517. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.allowCssStyles =
  518. function() {
  519. this.sanitizeCssPolicy_ = goog.html.sanitizer.HtmlSanitizer.sanitizeCssBlock_;
  520. return this;
  521. };
  522. /**
  523. * Wraps a custom policy function with the sanitizer's default policy.
  524. * @param {?goog.html.sanitizer.HtmlSanitizerPolicy} customPolicy The custom
  525. * policy for the tag/attribute combination.
  526. * @param {!goog.html.sanitizer.HtmlSanitizerPolicy} defaultPolicy The
  527. * sanitizer's policy that is always called after the custom policy.
  528. * @return {!goog.html.sanitizer.HtmlSanitizerPolicy}
  529. * @private
  530. */
  531. goog.html.sanitizer.HtmlSanitizer.wrapPolicy_ = function(
  532. customPolicy, defaultPolicy) {
  533. return /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (function(
  534. value, hints, ctx, policy) {
  535. var result = customPolicy(value, hints, ctx, policy);
  536. return result == null ? null : defaultPolicy(result, hints, ctx, policy);
  537. });
  538. };
  539. /**
  540. * Installs the sanitizer's default policy for a specific tag/attribute
  541. * combination on the provided whitelist, but only if a policy already exists.
  542. * @param {!Object<string, !goog.html.sanitizer.HtmlSanitizerPolicy>}
  543. * whitelist The whitelist to modify.
  544. * @param {!Object<string, boolean>} overrideList The set of attributes handlers
  545. * that should not be wrapped with a default policy.
  546. * @param {string} key The tag/attribute combination
  547. * @param {!goog.html.sanitizer.HtmlSanitizerPolicy} defaultPolicy The
  548. * sanitizer's policy.
  549. * @private
  550. */
  551. goog.html.sanitizer.HtmlSanitizer.installDefaultPolicy_ = function(
  552. whitelist, overrideList, key, defaultPolicy) {
  553. if (whitelist[key] && !overrideList[key]) {
  554. whitelist[key] = goog.html.sanitizer.HtmlSanitizer.wrapPolicy_(
  555. whitelist[key], defaultPolicy);
  556. }
  557. };
  558. /**
  559. * Builds and returns a goog.html.sanitizer.HtmlSanitizer object.
  560. * @return {!goog.html.sanitizer.HtmlSanitizer}
  561. */
  562. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.build = function() {
  563. return new goog.html.sanitizer.HtmlSanitizer(this);
  564. };
  565. /**
  566. * Installs the sanitization policies for the attributes.
  567. * May only be called once.
  568. * @private
  569. */
  570. goog.html.sanitizer.HtmlSanitizer.Builder.prototype.installPolicies_ =
  571. function() {
  572. if (this.policiesInstalled_) {
  573. throw new Error('HtmlSanitizer.Builder.build() can only be used once.');
  574. }
  575. if (!this.allowFormTag_) {
  576. this.tagBlacklist_['FORM'] = true;
  577. }
  578. var installPolicy = goog.html.sanitizer.HtmlSanitizer.installDefaultPolicy_;
  579. // Binding all the non-trivial attribute sanitizers to the appropriate,
  580. // potentially customizable, handling functions at build().
  581. installPolicy(
  582. this.attributeWhitelist_, this.attributeOverrideList_, '* USEMAP',
  583. /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (
  584. goog.html.sanitizer.HtmlSanitizer.sanitizeUrlFragment_));
  585. var urlAttributes = ['* ACTION', '* CITE', '* HREF'];
  586. goog.array.forEach(urlAttributes, function(attribute) {
  587. installPolicy(
  588. this.attributeWhitelist_, this.attributeOverrideList_, attribute,
  589. this.urlPolicy_);
  590. }, this);
  591. var networkUrlAttributes = [
  592. // LONGDESC can result in a network request. See b/23381636.
  593. '* LONGDESC', '* SRC', 'LINK HREF'
  594. ];
  595. goog.array.forEach(networkUrlAttributes, function(attribute) {
  596. installPolicy(
  597. this.attributeWhitelist_, this.attributeOverrideList_, attribute,
  598. this.networkRequestUrlPolicy_);
  599. }, this);
  600. var nameAttributes = ['* FOR', '* HEADERS', '* NAME'];
  601. goog.array.forEach(nameAttributes, function(attribute) {
  602. installPolicy(
  603. this.attributeWhitelist_, this.attributeOverrideList_, attribute,
  604. /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (goog.partial(
  605. goog.html.sanitizer.HtmlSanitizer.sanitizeName_,
  606. this.namePolicy_)));
  607. }, this);
  608. installPolicy(
  609. this.attributeWhitelist_, this.attributeOverrideList_, 'A TARGET',
  610. /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (goog.partial(
  611. goog.html.sanitizer.HtmlSanitizer.allowedAttributeValues_,
  612. ['_blank', '_self'])));
  613. installPolicy(
  614. this.attributeWhitelist_, this.attributeOverrideList_, '* CLASS',
  615. /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (goog.partial(
  616. goog.html.sanitizer.HtmlSanitizer.sanitizeClasses_,
  617. this.tokenPolicy_)));
  618. installPolicy(
  619. this.attributeWhitelist_, this.attributeOverrideList_, '* ID',
  620. /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (goog.partial(
  621. goog.html.sanitizer.HtmlSanitizer.sanitizeId_, this.tokenPolicy_)));
  622. if (this.sanitizeCssPolicy_) {
  623. installPolicy(
  624. this.attributeWhitelist_, this.attributeOverrideList_, '* STYLE',
  625. /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (goog.partial(
  626. this.sanitizeCssPolicy_, this.networkRequestUrlPolicy_)));
  627. } else {
  628. installPolicy(
  629. this.attributeWhitelist_, this.attributeOverrideList_, '* STYLE',
  630. goog.functions.NULL);
  631. }
  632. this.policiesInstalled_ = true;
  633. };
  634. /**
  635. * The default policy for URLs: allow any.
  636. * @param {string} token The URL to undergo this policy.
  637. * @return {?string}
  638. * @private
  639. */
  640. goog.html.sanitizer.HtmlSanitizer.defaultUrlPolicy_ =
  641. goog.html.sanitizer.HtmlSanitizer.sanitizeUrl_(goog.html.SafeUrl.sanitize);
  642. /**
  643. * The default policy for URLs which cause network requests: drop all.
  644. * @param {string} token The URL to undergo this policy.
  645. * @return {null}
  646. * @private
  647. */
  648. goog.html.sanitizer.HtmlSanitizer.defaultNetworkRequestUrlPolicy_ =
  649. goog.functions.NULL;
  650. /**
  651. * The default policy for attribute names: drop all.
  652. * @param {string} token The name to undergo this policy.
  653. * @return {?string}
  654. * @private
  655. */
  656. goog.html.sanitizer.HtmlSanitizer.defaultNamePolicy_ = goog.functions.NULL;
  657. /**
  658. * The default policy for other tokens (i.e. class names and IDs): drop all.
  659. * @param {string} token The token to undergo this policy.
  660. * @return {?string}
  661. * @private
  662. */
  663. goog.html.sanitizer.HtmlSanitizer.defaultTokenPolicy_ = goog.functions.NULL;
  664. /**
  665. * Returns a key into the attribute handlers dictionary given a node name and
  666. * an attribute name. If no node name is given, returns a key applying to all
  667. * nodes.
  668. * @param {?string} nodeName
  669. * @param {string} attributeName
  670. * @return {string} key into attribute handlers dict
  671. * @private
  672. */
  673. goog.html.sanitizer.HtmlSanitizer.attrIdentifier_ = function(
  674. nodeName, attributeName) {
  675. if (!nodeName) {
  676. nodeName = '*';
  677. }
  678. return (nodeName + ' ' + attributeName).toUpperCase();
  679. };
  680. /**
  681. * Sanitizes a block of CSS rules.
  682. * @param {goog.html.sanitizer.HtmlSanitizerPolicy} policySanitizeUrl
  683. * @param {string} attrValue
  684. * @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
  685. * @param {goog.html.sanitizer.HtmlSanitizerPolicyContext} policyContext
  686. * @return {?string} sanitizedCss from the policyContext
  687. * @private
  688. */
  689. goog.html.sanitizer.HtmlSanitizer.sanitizeCssBlock_ = function(
  690. policySanitizeUrl, attrValue, policyHints, policyContext) {
  691. if (!policyContext.cssStyle) {
  692. return null;
  693. }
  694. var naiveUriRewriter = function(uri, prop) {
  695. policyHints.cssProperty = prop;
  696. return goog.html.uncheckedconversions
  697. .safeUrlFromStringKnownToSatisfyTypeContract(
  698. goog.string.Const.from(
  699. 'HtmlSanitizerPolicy created with networkRequestUrlPolicy_ ' +
  700. 'when installing \'* STYLE\' handler.'),
  701. policySanitizeUrl(uri, policyHints) || '');
  702. };
  703. var sanitizedStyle = goog.html.SafeStyle.unwrap(
  704. goog.html.sanitizer.CssSanitizer.sanitizeInlineStyle(
  705. policyContext.cssStyle, naiveUriRewriter));
  706. return sanitizedStyle == '' ? null : sanitizedStyle;
  707. };
  708. /**
  709. * Cleans up an attribute value that we don't particularly want to do anything
  710. * to. At the moment we just trim the whitespace.
  711. * @param {string} attrValue
  712. * @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
  713. * @return {string} sanitizedAttrValue
  714. * @private
  715. */
  716. goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_ = function(
  717. attrValue, policyHints) {
  718. return goog.string.trim(attrValue);
  719. };
  720. /**
  721. * Allows a set of attribute values.
  722. * @param {!Array<string>} allowedValues Set of allowed values lowercased.
  723. * @param {string} attrValue
  724. * @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
  725. * @return {?string} sanitizedAttrValue
  726. * @private
  727. */
  728. goog.html.sanitizer.HtmlSanitizer.allowedAttributeValues_ = function(
  729. allowedValues, attrValue, policyHints) {
  730. var trimmed = goog.string.trim(attrValue);
  731. return goog.array.contains(allowedValues, trimmed.toLowerCase()) ? trimmed :
  732. null;
  733. };
  734. /**
  735. * Sanitizes URL fragments.
  736. * @param {string} urlFragment
  737. * @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
  738. * @return {?string} sanitizedAttrValue
  739. * @private
  740. */
  741. goog.html.sanitizer.HtmlSanitizer.sanitizeUrlFragment_ = function(
  742. urlFragment, policyHints) {
  743. var trimmed = goog.string.trim(urlFragment);
  744. if (trimmed && trimmed.charAt(0) == '#') {
  745. // We do not apply the name or token policy to Url Fragments by design.
  746. return trimmed;
  747. }
  748. return null;
  749. };
  750. /**
  751. * Runs an attribute name through a name policy.
  752. * @param {goog.html.sanitizer.HtmlSanitizerPolicy} namePolicy
  753. * @param {string} attrName
  754. * @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
  755. * @return {?string} sanitizedAttrValue
  756. * @private
  757. */
  758. goog.html.sanitizer.HtmlSanitizer.sanitizeName_ = function(
  759. namePolicy, attrName, policyHints) {
  760. var trimmed = goog.string.trim(attrName);
  761. /* TODO(user): fail on names which contain illegal characters.
  762. * NOTE(jasvir):
  763. * There are two cases to be concerned about - escaped quotes in attribute
  764. * values which is the responsibility of the serializer and illegal
  765. * characters. The latter does violate the spec but I do not believe it has
  766. * a security consequence.
  767. */
  768. return namePolicy(trimmed, policyHints);
  769. };
  770. /**
  771. * Ensures that the class prefix is present on all space-separated tokens
  772. * (i.e. all class names).
  773. * @param {goog.html.sanitizer.HtmlSanitizerPolicy} tokenPolicy
  774. * @param {string} attrValue
  775. * @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
  776. * @return {?string} sanitizedAttrValue
  777. * @private
  778. */
  779. goog.html.sanitizer.HtmlSanitizer.sanitizeClasses_ = function(
  780. tokenPolicy, attrValue, policyHints) {
  781. // TODO(user): use a browser-supplied class list instead of a string.
  782. var classes = attrValue.split(/(?:\s+)/);
  783. var sanitizedClasses = [];
  784. for (var i = 0; i < classes.length; i++) {
  785. // TODO(user): skip classes which contain illegal characters.
  786. var sanitizedClass = tokenPolicy(classes[i], policyHints);
  787. if (sanitizedClass) {
  788. sanitizedClasses.push(sanitizedClass);
  789. }
  790. }
  791. return sanitizedClasses.length == 0 ? null : sanitizedClasses.join(' ');
  792. };
  793. /**
  794. * Ensures that the id prefix is present.
  795. * @param {goog.html.sanitizer.HtmlSanitizerPolicy} tokenPolicy
  796. * @param {string} attrValue
  797. * @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
  798. * @return {?string} sanitizedAttrValue
  799. * @private
  800. */
  801. goog.html.sanitizer.HtmlSanitizer.sanitizeId_ = function(
  802. tokenPolicy, attrValue, policyHints) {
  803. var trimmed = goog.string.trim(attrValue);
  804. // TODO(user): fail on IDs which contain illegal characters.
  805. return tokenPolicy(trimmed, policyHints);
  806. };
  807. /**
  808. * Parses a string of unsanitized HTML and provides an iterator over the
  809. * resulting DOM tree nodes. This DOM parsing must be wholly inert (that is,
  810. * it does not cause execution of any active content or cause the browser to
  811. * issue any requests). The returned iterator is guaranteed to iterate over a
  812. * parent element before iterating over any of its children.
  813. * @param {string} unsanitizedHtml
  814. * @return {!TreeWalker} Dom tree iterator
  815. * @private
  816. */
  817. goog.html.sanitizer.HtmlSanitizer.getDomTreeWalker_ = function(
  818. unsanitizedHtml) {
  819. var iteratorParent;
  820. // Use a <template> element if possible.
  821. var templateElement = document.createElement('template');
  822. if ('content' in templateElement) {
  823. templateElement.innerHTML = unsanitizedHtml;
  824. iteratorParent = templateElement.content;
  825. } else {
  826. // In browsers where <template> is not implemented, use an HTMLDocument.
  827. var doc = document.implementation.createHTMLDocument('x');
  828. iteratorParent = doc.body;
  829. doc.body.innerHTML = unsanitizedHtml;
  830. }
  831. return document.createTreeWalker(
  832. iteratorParent, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT, null,
  833. false);
  834. };
  835. // TODO(pelizzi): both getAttribute* functions accept a Node but are defined on
  836. // Element. Investigate.
  837. /**
  838. * Returns an element's attributes without falling prey to things like
  839. * &lt;form&gt;&lt;input name="attributes"&gt;
  840. * &lt;input name="attributes"&gt;&lt;/form&gt;.
  841. * @param {!Node} node
  842. * @return {?NamedNodeMap}
  843. * @private
  844. */
  845. goog.html.sanitizer.HtmlSanitizer.getAttributes_ = function(node) {
  846. var attrDescriptor =
  847. goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['attributes'];
  848. if (attrDescriptor && attrDescriptor.get) {
  849. return attrDescriptor.get.apply(node);
  850. } else {
  851. return node.attributes instanceof NamedNodeMap ? node.attributes : null;
  852. }
  853. };
  854. /**
  855. * Returns a specific attribute from an element without falling prey to
  856. * clobbering.
  857. * @param {!Node} node
  858. * @param {string} attrName
  859. * @return {string}
  860. * @private
  861. */
  862. goog.html.sanitizer.HtmlSanitizer.getAttribute_ = function(node, attrName) {
  863. var protoFn = Element.prototype.getAttribute;
  864. if (protoFn && node instanceof Element) {
  865. var ret = protoFn.call(/** @type {!Element} */ (node), attrName);
  866. return ret || ''; // FireFox returns null
  867. } else {
  868. return '';
  869. }
  870. };
  871. /**
  872. * Sets an element's attributes without falling prey to things like
  873. * &lt;form&gt;&lt;input name="attributes"&gt;
  874. * &lt;input name="attributes"&gt;&lt;/form&gt;.
  875. * @param {!Node} node
  876. * @param {string} name
  877. * @param {string} value
  878. * @private
  879. */
  880. goog.html.sanitizer.HtmlSanitizer.setAttribute_ = function(node, name, value) {
  881. var attrDescriptor =
  882. goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['setAttribute'];
  883. if (attrDescriptor && attrDescriptor.value) {
  884. try {
  885. attrDescriptor.value.call(node, name, value);
  886. } catch (e) {
  887. // IE throws an exception if the src attribute contains HTTP credentials.
  888. // However the attribute gets set anyway.
  889. if (e.message.indexOf('A security problem occurred') == -1) {
  890. throw e;
  891. }
  892. }
  893. }
  894. };
  895. /**
  896. * Returns a node's innerHTML property value without falling prey to clobbering.
  897. * @param {!Node} node
  898. * @return {string}
  899. * @private
  900. */
  901. goog.html.sanitizer.HtmlSanitizer.getInnerHTML_ = function(node) {
  902. var descriptor =
  903. goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['innerHTML'];
  904. if (descriptor && descriptor.get) {
  905. return descriptor.get.apply(node);
  906. } else {
  907. return (typeof node.innerHTML == 'string') ? node.innerHTML : '';
  908. }
  909. };
  910. /**
  911. * Returns an element's style without falling prey to things like
  912. * &lt;form&gt;&lt;input name="style"&gt;
  913. * &lt;input name="style"&gt;&lt;/form&gt;.
  914. * @param {!Node} node
  915. * @return {?CSSStyleDeclaration}
  916. * @private
  917. */
  918. goog.html.sanitizer.HtmlSanitizer.getStyle_ = function(node) {
  919. var styleDescriptor =
  920. goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['style'];
  921. if (node instanceof HTMLElement && styleDescriptor && styleDescriptor.get) {
  922. return styleDescriptor.get.apply(node);
  923. } else {
  924. return node.style instanceof CSSStyleDeclaration ? node.style : null;
  925. }
  926. };
  927. /**
  928. * Returns a node's nodeName without falling prey to things like
  929. * &lt;form&gt;&lt;input name="nodeName"&gt;&lt;/form&gt;.
  930. * @param {!Node} node
  931. * @return {string}
  932. * @private
  933. */
  934. goog.html.sanitizer.HtmlSanitizer.getNodeName_ = function(node) {
  935. var nodeNameDescriptor =
  936. goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['nodeName'];
  937. if (nodeNameDescriptor && nodeNameDescriptor.get) {
  938. return nodeNameDescriptor.get.apply(node);
  939. } else {
  940. return (typeof node.nodeName == 'string') ? node.nodeName : 'unknown';
  941. }
  942. };
  943. /**
  944. * Returns a node's parentNode without falling prey to things like
  945. * &lt;form&gt;&lt;input name="parentNode"&gt;&lt;/form&gt;.
  946. * @param {?Node} node
  947. * @return {?Node}
  948. * @private
  949. */
  950. goog.html.sanitizer.HtmlSanitizer.getParentNode_ = function(node) {
  951. if (node == null) {
  952. return null;
  953. }
  954. var parentNodeDescriptor =
  955. goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['parentNode'];
  956. if (parentNodeDescriptor && parentNodeDescriptor.get) {
  957. return parentNodeDescriptor.get.apply(node);
  958. } else {
  959. // We need to ensure that parentNode is returning the actual parent node
  960. // and not a child node that happens to have a name of "parentNode".
  961. // We check that the node returned by parentNode is itself not named
  962. // "parentNode" - this could happen legitimately but on IE we have no better
  963. // means of avoiding the pitfall.
  964. var parentNode = node.parentNode;
  965. if (parentNode && parentNode.name && typeof parentNode.name == 'string' &&
  966. parentNode.name.toLowerCase() == 'parentnode') {
  967. return null;
  968. } else {
  969. return parentNode;
  970. }
  971. }
  972. };
  973. /**
  974. * Returns the value of node.childNodes without falling prey to clobbering.
  975. * @param {!Node} node
  976. * @return {?NodeList}
  977. * @private
  978. */
  979. goog.html.sanitizer.HtmlSanitizer.getChildNodes_ = function(node) {
  980. var descriptor =
  981. goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['childNodes'];
  982. if (goog.dom.isElement(node) && descriptor && descriptor.get) {
  983. return descriptor.get.apply(node);
  984. } else {
  985. return node.childNodes instanceof NodeList ? node.childNodes : null;
  986. }
  987. };
  988. /**
  989. * Parses the DOM tree of a given HTML string, then walks the tree. For each
  990. * element, it creates a new sanitized version, applies sanitized attributes,
  991. * and returns a SafeHtml object representing the sanitized tree.
  992. * @param {?string} unsanitizedHtml
  993. * @return {!goog.html.SafeHtml} Sanitized HTML
  994. * @final
  995. */
  996. goog.html.sanitizer.HtmlSanitizer.prototype.sanitize = function(
  997. unsanitizedHtml) {
  998. var sanitizedParent = this.sanitizeToDomNode(unsanitizedHtml);
  999. var sanitizedString = new XMLSerializer().serializeToString(sanitizedParent);
  1000. // Remove the outer span added in sanitizeToDomNode. We could create an
  1001. // element from it and then pull out the innerHTML, but this is more
  1002. // performant.
  1003. if (goog.string.startsWith(sanitizedString, '<span')) {
  1004. if (goog.string.endsWith(sanitizedString, '</span>')) {
  1005. sanitizedString = sanitizedString.slice(
  1006. sanitizedString.indexOf('>') + 1, -1 * ('</span>'.length));
  1007. } else if (goog.string.endsWith(sanitizedString, '/>')) {
  1008. sanitizedString = '';
  1009. }
  1010. }
  1011. return goog.html.uncheckedconversions
  1012. .safeHtmlFromStringKnownToSatisfyTypeContract(
  1013. goog.string.Const.from('Output of HTML sanitizer'), sanitizedString);
  1014. };
  1015. /**
  1016. * Parses the DOM tree of a given HTML string, then walks the tree. For each
  1017. * element, it creates a new sanitized version, applies sanitized attributes,
  1018. * and returns a span element containing the sanitized content.
  1019. * @param {?string} unsanitizedHtml
  1020. * @return {!HTMLSpanElement} Sanitized HTML
  1021. * @final
  1022. */
  1023. goog.html.sanitizer.HtmlSanitizer.prototype.sanitizeToDomNode = function(
  1024. unsanitizedHtml) {
  1025. var sanitizedParent =
  1026. /** @type {!HTMLSpanElement} */ (document.createElement('span'));
  1027. if (!goog.html.sanitizer.HTML_SANITIZER_SUPPORTED_ || !unsanitizedHtml) {
  1028. // TODO(danesh): IE9 or earlier versions don't provide an easy way to
  1029. // parse HTML inertly. Handle in a way other than an empty span perhaps.
  1030. return sanitizedParent;
  1031. }
  1032. // Get the treeWalker initialized.
  1033. try {
  1034. var treeWalker =
  1035. goog.html.sanitizer.HtmlSanitizer.getDomTreeWalker_(unsanitizedHtml);
  1036. } catch (e) {
  1037. return sanitizedParent;
  1038. }
  1039. // Used in order to find the correct parent node in the sanitizedParent.
  1040. var elementMap = {};
  1041. // Used in order to give a unique identifier to each node for lookups.
  1042. var elemNum = 0;
  1043. // Used for iteration.
  1044. var dirtyNode;
  1045. while (dirtyNode = treeWalker.nextNode()) {
  1046. elemNum++;
  1047. // Get a clean (sanitized) version of the dirty node.
  1048. var cleanNode = this.sanitizeElement_(dirtyNode);
  1049. if (cleanNode.nodeType != goog.dom.NodeType.TEXT) {
  1050. this.sanitizeAttrs_(dirtyNode, cleanNode);
  1051. elementMap[elemNum] = cleanNode;
  1052. goog.html.sanitizer.HtmlSanitizer.setAttribute_(
  1053. dirtyNode, goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_ATTR_NAME_,
  1054. String(elemNum));
  1055. }
  1056. // TODO(pelizzi): [IMPROVEMENT] type-checking against clobbering (e.g.
  1057. // ClobberedNode wrapper). Closure can unwrap these at compile time, see
  1058. // ClosureOptimizePrimitives.java, jakubvrana has created one for
  1059. // goog.dom.Tag. Alternatively, create two actual wrappers that expose
  1060. // clobber-safe functions, getters and setters for Node and Element.
  1061. // TODO(pelizzi): [IMPROVEMENT] consider switching from elementMap[elemNum]
  1062. // to a WeakMap for browsers that support it (e.g. use a ElementWeakMap that
  1063. // falls back to using data attributes).
  1064. // @type {ElementWeakMap<ClobberedNode, Node>}
  1065. // TODO(pelizzi): [IMPROVEMENT] add an API to sanitize *from* DOM nodes so
  1066. // that we don't have to use innerHTML on template recursion but instead we
  1067. // can use importNode. The API could also be public as it is still a way to
  1068. // make a document fragment conform to a policy, somewhat useful.
  1069. // Template tag contents require special handling as they are not traversed
  1070. // by the treewalker.
  1071. var dirtyNodeName =
  1072. goog.html.sanitizer.HtmlSanitizer.getNodeName_(dirtyNode);
  1073. if (goog.html.sanitizer.HTML_SANITIZER_TEMPLATE_SUPPORTED &&
  1074. dirtyNodeName.toLowerCase() === 'template' &&
  1075. !cleanNode.hasAttribute(
  1076. goog.html.sanitizer.HTML_SANITIZER_BLACKLISTED_TAG_)) {
  1077. this.processTemplateContents_(dirtyNode, cleanNode);
  1078. }
  1079. // Finds the parent to which cleanNode should be appended.
  1080. var dirtyParent =
  1081. goog.html.sanitizer.HtmlSanitizer.getParentNode_(dirtyNode);
  1082. var isSanitizedParent = false;
  1083. if (goog.isNull(dirtyParent)) {
  1084. isSanitizedParent = true;
  1085. } else if (
  1086. goog.html.sanitizer.HtmlSanitizer.getNodeName_(dirtyParent)
  1087. .toLowerCase() == 'body' ||
  1088. dirtyParent.nodeType == goog.dom.NodeType.DOCUMENT_FRAGMENT) {
  1089. var dirtyGrandParent =
  1090. goog.html.sanitizer.HtmlSanitizer.getParentNode_(dirtyParent);
  1091. // The following checks if target is an immediate child of the inert
  1092. // parent template element
  1093. if (dirtyParent.nodeType == goog.dom.NodeType.DOCUMENT_FRAGMENT &&
  1094. goog.isNull(dirtyGrandParent)) {
  1095. isSanitizedParent = true;
  1096. } else if (
  1097. goog.html.sanitizer.HtmlSanitizer.getNodeName_(dirtyParent)
  1098. .toLowerCase() == 'body') {
  1099. // The following checks if target is an immediate child of the inert
  1100. // parent HtmlDocument
  1101. var dirtyGrtGrandParent =
  1102. goog.html.sanitizer.HtmlSanitizer.getParentNode_(dirtyGrandParent);
  1103. if (goog.isNull(goog.html.sanitizer.HtmlSanitizer.getParentNode_(
  1104. dirtyGrtGrandParent))) {
  1105. isSanitizedParent = true;
  1106. }
  1107. }
  1108. }
  1109. var target;
  1110. if (isSanitizedParent || !dirtyParent) {
  1111. target = sanitizedParent;
  1112. } else {
  1113. target = elementMap[goog.html.sanitizer.HtmlSanitizer.getAttribute_(
  1114. dirtyParent,
  1115. goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_ATTR_NAME_)];
  1116. }
  1117. if (target.content) {
  1118. target = target.content;
  1119. }
  1120. // Do not attach blacklisted tags that have been sanitized into templates.
  1121. if (!goog.dom.isElement(cleanNode) ||
  1122. !cleanNode.hasAttribute(
  1123. goog.html.sanitizer.HTML_SANITIZER_BLACKLISTED_TAG_)) {
  1124. target.appendChild(cleanNode);
  1125. }
  1126. }
  1127. return sanitizedParent;
  1128. };
  1129. /**
  1130. * Returns a sanitized version of an element, with no children or user-provided
  1131. * attributes.
  1132. * @param {!Node} dirtyNode
  1133. * @return {!Node}
  1134. * @private
  1135. */
  1136. goog.html.sanitizer.HtmlSanitizer.prototype.sanitizeElement_ = function(
  1137. dirtyNode) {
  1138. // Text nodes don't need to be sanitized.
  1139. if (dirtyNode.nodeType == goog.dom.NodeType.TEXT) {
  1140. return document.createTextNode(dirtyNode.data);
  1141. }
  1142. // Non text nodes get an empty node based on black/white lists.
  1143. var elemName =
  1144. goog.html.sanitizer.HtmlSanitizer.getNodeName_(dirtyNode).toUpperCase();
  1145. var sanitized = false;
  1146. var blacklisted = false;
  1147. var cleanElemName;
  1148. if (elemName in goog.html.sanitizer.TagBlacklist ||
  1149. elemName in this.tagBlacklist_) {
  1150. // If it's in the inert blacklist, replace with template (and then add a
  1151. // special data attribute to distinguish it from real template tags).
  1152. // Note that this node will not be added to the final output, i.e. the
  1153. // template tag is only an internal representation, and eventually will be
  1154. // deleted.
  1155. cleanElemName = 'template';
  1156. blacklisted = true;
  1157. } else if (this.tagWhitelist_[elemName]) {
  1158. // If it's in the whitelist, keep as is.
  1159. cleanElemName = elemName;
  1160. } else {
  1161. // If it's not in any list, replace with span. If the relevant builder
  1162. // option is enabled, they will bear the original tag name in a data
  1163. // attribute.
  1164. cleanElemName = 'span';
  1165. sanitized = true;
  1166. }
  1167. var cleanElem = document.createElement(cleanElemName);
  1168. if (this.shouldAddOriginalTagNames_ && sanitized) {
  1169. goog.html.sanitizer.HtmlSanitizer.setAttribute_(
  1170. cleanElem, goog.html.sanitizer.HTML_SANITIZER_SANITIZED_ATTR_NAME_,
  1171. elemName.toLowerCase());
  1172. }
  1173. if (blacklisted) {
  1174. goog.html.sanitizer.HtmlSanitizer.setAttribute_(
  1175. cleanElem, goog.html.sanitizer.HTML_SANITIZER_BLACKLISTED_TAG_, '');
  1176. }
  1177. return cleanElem;
  1178. };
  1179. /**
  1180. * Applies sanitized versions of attributes from a dirtyNode to a corresponding
  1181. * cleanNode.
  1182. * @param {!Node} dirtyNode
  1183. * @param {!Node} cleanNode
  1184. * @return {!Node} cleanNode with sanitized attributes
  1185. * @private
  1186. */
  1187. goog.html.sanitizer.HtmlSanitizer.prototype.sanitizeAttrs_ = function(
  1188. dirtyNode, cleanNode) {
  1189. var attributes = goog.html.sanitizer.HtmlSanitizer.getAttributes_(dirtyNode);
  1190. if (attributes == null) {
  1191. return cleanNode;
  1192. }
  1193. for (var i = 0, attribute; attribute = attributes[i]; i++) {
  1194. if (attribute.specified) {
  1195. var cleanValue = this.sanitizeAttribute_(dirtyNode, attribute);
  1196. if (!goog.isNull(cleanValue)) {
  1197. goog.html.sanitizer.HtmlSanitizer.setAttribute_(
  1198. cleanNode, attribute.name, cleanValue);
  1199. }
  1200. }
  1201. }
  1202. return cleanNode;
  1203. };
  1204. /**
  1205. * Sanitizes an attribute value by looking up an attribute handler for the given
  1206. * node and attribute names.
  1207. * @param {!Node} dirtyNode
  1208. * @param {!Attr} attribute
  1209. * @return {?string} sanitizedAttrValue
  1210. * @private
  1211. */
  1212. goog.html.sanitizer.HtmlSanitizer.prototype.sanitizeAttribute_ = function(
  1213. dirtyNode, attribute) {
  1214. var attributeName = attribute.name;
  1215. if (goog.string.startsWith(
  1216. attributeName,
  1217. goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_)) {
  1218. return null;
  1219. }
  1220. var nodeName = goog.html.sanitizer.HtmlSanitizer.getNodeName_(dirtyNode);
  1221. var unsanitizedAttrValue = attribute.value;
  1222. // Create policy hints object
  1223. var policyHints = {
  1224. tagName: goog.string.trim(nodeName).toLowerCase(),
  1225. attributeName: goog.string.trim(attributeName).toLowerCase()
  1226. };
  1227. var policyContext = goog.html.sanitizer.HtmlSanitizer.getContext_(
  1228. policyHints.attributeName, dirtyNode);
  1229. // Prefer attribute handler for this specific tag.
  1230. var tagHandlerIndex = goog.html.sanitizer.HtmlSanitizer.attrIdentifier_(
  1231. nodeName, attributeName);
  1232. if (tagHandlerIndex in this.attributeHandlers_) {
  1233. var handler = this.attributeHandlers_[tagHandlerIndex];
  1234. return handler(unsanitizedAttrValue, policyHints, policyContext);
  1235. }
  1236. // Fall back on attribute handler for wildcard tag.
  1237. var genericHandlerIndex =
  1238. goog.html.sanitizer.HtmlSanitizer.attrIdentifier_(null, attributeName);
  1239. if (genericHandlerIndex in this.attributeHandlers_) {
  1240. var handler = this.attributeHandlers_[genericHandlerIndex];
  1241. return handler(unsanitizedAttrValue, policyHints, policyContext);
  1242. }
  1243. return null;
  1244. };
  1245. /**
  1246. * Processes the contents of a template tag. These are not traversed through the
  1247. * treewalker because they belong to a separate document, and thus require
  1248. * special handling.
  1249. * <p>
  1250. * If the relevant builder option is enabled and the template tag is allowed,
  1251. * this method copies the contents over to the output DOM tree without
  1252. * sanitization, otherwise the template contents are sanitized recursively.
  1253. * @param {!Node} dirtyNode
  1254. * @param {!Node} cleanNode
  1255. * @private
  1256. */
  1257. goog.html.sanitizer.HtmlSanitizer.prototype.processTemplateContents_ = function(
  1258. dirtyNode, cleanNode) {
  1259. // If the template element was sanitized into a span tag, do not insert
  1260. // unsanitized tags!
  1261. if (this.shouldSanitizeTemplateContents_ ||
  1262. cleanNode.nodeName.toLowerCase() !== 'template') {
  1263. var dirtyNodeHTML =
  1264. goog.html.sanitizer.HtmlSanitizer.getInnerHTML_(dirtyNode);
  1265. var templateSpan = this.sanitizeToDomNode(dirtyNodeHTML);
  1266. // appendChild with a forEach instead of an innertHTML as the latter is
  1267. // slower.
  1268. goog.array.forEach(templateSpan.childNodes, function(node) {
  1269. cleanNode.appendChild(node);
  1270. });
  1271. } else {
  1272. var templateDoc =
  1273. /** @type {!HTMLTemplateElement} */ (cleanNode).content.ownerDocument;
  1274. var dirtyCopy =
  1275. goog.asserts.assert(templateDoc.importNode(dirtyNode, true));
  1276. var dirtyCopyChildren =
  1277. goog.html.sanitizer.HtmlSanitizer.getChildNodes_(dirtyCopy);
  1278. // appendChild with a forEach instead of an innerHTML as the latter is
  1279. // slower and vulnerable to mXSS.
  1280. goog.array.forEach(dirtyCopyChildren, function(node) {
  1281. cleanNode.appendChild(node);
  1282. });
  1283. }
  1284. };
  1285. /**
  1286. * Retrieves a HtmlSanitizerPolicyContext from a dirty node given an attribute
  1287. * name.
  1288. * @param {string} attributeName
  1289. * @param {!Node} dirtyNode
  1290. * @return {!goog.html.sanitizer.HtmlSanitizerPolicyContext}
  1291. * @private
  1292. */
  1293. goog.html.sanitizer.HtmlSanitizer.getContext_ = function(
  1294. attributeName, dirtyNode) {
  1295. var policyContext = {cssStyle: undefined};
  1296. if (attributeName == 'style') {
  1297. policyContext.cssStyle =
  1298. goog.html.sanitizer.HtmlSanitizer.getStyle_(dirtyNode);
  1299. }
  1300. return policyContext;
  1301. };
  1302. /**
  1303. * Sanitizes a HTML string using a sanitizer with default options.
  1304. * @param {string} unsanitizedHtml
  1305. * @return {!goog.html.SafeHtml} sanitizedHtml
  1306. */
  1307. goog.html.sanitizer.HtmlSanitizer.sanitize = function(unsanitizedHtml) {
  1308. var sanitizer = new goog.html.sanitizer.HtmlSanitizer.Builder().build();
  1309. return sanitizer.sanitize(unsanitizedHtml);
  1310. };