struct_tree_spec.js 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. /* Copyright 2021 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import { buildGetDocumentParams } from "./test_utils.js";
  16. import { getDocument } from "../../src/display/api.js";
  17. function equalTrees(rootA, rootB) {
  18. function walk(a, b) {
  19. expect(a.role).toEqual(b.role);
  20. expect(a.lang).toEqual(b.lang);
  21. expect(a.type).toEqual(b.type);
  22. expect("children" in a).toEqual("children" in b);
  23. if (!a.children) {
  24. return;
  25. }
  26. expect(a.children.length).toEqual(b.children.length);
  27. for (let i = 0; i < rootA.children.length; i++) {
  28. walk(a.children[i], b.children[i]);
  29. }
  30. }
  31. return walk(rootA, rootB);
  32. }
  33. describe("struct tree", function () {
  34. describe("getStructTree", function () {
  35. it("parses basic structure", async function () {
  36. const filename = "structure_simple.pdf";
  37. const params = buildGetDocumentParams(filename);
  38. const loadingTask = getDocument(params);
  39. const doc = await loadingTask.promise;
  40. const page = await doc.getPage(1);
  41. const struct = await page.getStructTree();
  42. equalTrees(
  43. {
  44. role: "Root",
  45. children: [
  46. {
  47. role: "Document",
  48. lang: "en-US",
  49. children: [
  50. {
  51. role: "H1",
  52. children: [
  53. { role: "NonStruct", children: [{ type: "content" }] },
  54. ],
  55. },
  56. {
  57. role: "P",
  58. children: [
  59. { role: "NonStruct", children: [{ type: "content" }] },
  60. ],
  61. },
  62. {
  63. role: "H2",
  64. children: [
  65. { role: "NonStruct", children: [{ type: "content" }] },
  66. ],
  67. },
  68. {
  69. role: "P",
  70. children: [
  71. { role: "NonStruct", children: [{ type: "content" }] },
  72. ],
  73. },
  74. ],
  75. },
  76. ],
  77. },
  78. struct
  79. );
  80. await loadingTask.destroy();
  81. });
  82. it("parses structure with marked content reference", async function () {
  83. const filename = "issue6782.pdf";
  84. const params = buildGetDocumentParams(filename);
  85. const loadingTask = getDocument(params);
  86. const doc = await loadingTask.promise;
  87. const page = await doc.getPage(1);
  88. const struct = await page.getStructTree();
  89. equalTrees(
  90. {
  91. role: "Root",
  92. children: [
  93. {
  94. role: "Part",
  95. children: [
  96. { role: "P", children: Array(27).fill({ type: "content" }) },
  97. ],
  98. },
  99. ],
  100. },
  101. struct
  102. );
  103. await loadingTask.destroy();
  104. });
  105. });
  106. });