textextractor_test.js 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. // Copyright 2017 The Closure Library Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS-IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /**
  15. * @fileoverview Tests for the textExtractor module.
  16. */
  17. goog.module('goog.html.textExtractorTest');
  18. goog.setTestOnly();
  19. var testSuite = goog.require('goog.testing.testSuite');
  20. var textExtractor = goog.require('goog.html.textExtractor');
  21. /**
  22. * Verifies that the expected text is extracted from the HTML string.
  23. * @param {string} html The HTML string containing text mixed in HTML markup.
  24. * @param {string} expectedText The expected text extracted from the HTML
  25. * string.
  26. */
  27. function assertExtractedTextEquals(html, expectedText) {
  28. var actualText = textExtractor.extractTextContent(html);
  29. if (textExtractor.isSupported()) {
  30. assertEquals(actualText, expectedText);
  31. } else {
  32. assertEquals(actualText, '');
  33. }
  34. }
  35. testSuite({
  36. testExtractTextContent_justText: function() {
  37. var html = 'Hello';
  38. assertExtractedTextEquals(html, html);
  39. },
  40. testExtractTextContent_basic: function() {
  41. var html = '<p>Hello</p>';
  42. var expectedText = 'Hello';
  43. assertExtractedTextEquals(html, expectedText);
  44. },
  45. testExtractTextContent_removesScript: function() {
  46. var html = '<p>Foo<script>Bar</script>Baz</p>';
  47. var expectedText = 'FooBaz';
  48. assertExtractedTextEquals(html, expectedText);
  49. },
  50. testExtractTextContent_blocks: function() {
  51. var html = '<div>Foo</div><div>Bar</div>';
  52. var expectedText = 'Foo\n\nBar';
  53. assertExtractedTextEquals(html, expectedText);
  54. },
  55. testExtractTextContent_extraNewlines: function() {
  56. var html = '<p>Foo</p>\n<p>Bar</p>';
  57. var expectedText = 'Foo\n\nBar';
  58. assertExtractedTextEquals(html, expectedText);
  59. },
  60. testExtractTextContent_inline: function() {
  61. var html = '<h1>Foo<span>Bar</span></h1>';
  62. var expectedText = 'FooBar';
  63. assertExtractedTextEquals(html, expectedText);
  64. },
  65. testExtractTextContent_complex: function() {
  66. var html = '<div>\n' +
  67. ' \n' +
  68. ' A\n' +
  69. '\n' +
  70. ' mind\n' +
  71. '\n' +
  72. ' needs books<br>as a sword needs a whetstone<p>' + // no line break
  73. 'if it is to <span style="display: block">keep</span> its edge.\n' +
  74. ' </p>\n' +
  75. '\n' +
  76. '</div>';
  77. var expectedText = 'A mind needs books\n' +
  78. 'as a sword needs a whetstone\n' +
  79. 'if it is to\n' +
  80. 'keep\n' +
  81. 'its edge.';
  82. assertExtractedTextEquals(html, expectedText);
  83. },
  84. testExtractTextContent_newlines: function() {
  85. var html = 'Hello\nWorld';
  86. var expectedText = 'Hello World';
  87. assertExtractedTextEquals(html, expectedText);
  88. },
  89. testExtractTextContent_br: function() {
  90. var html = 'Hello\n<br>World';
  91. var expectedText = 'Hello\nWorld';
  92. assertExtractedTextEquals(html, expectedText);
  93. },
  94. testExtractTextContent_brAndBlock: function() {
  95. var html = 'Hello\n\n<br>\n<p>World</p>';
  96. var expectedText = 'Hello\n\nWorld';
  97. assertExtractedTextEquals(html, expectedText);
  98. }
  99. });