dom.js 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. /**
  2. * Copyright 2018 Google LLC
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5. * use this file except in compliance with the License. You may obtain a copy of
  6. * the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. * License for the specific language governing permissions and limitations under
  14. * the License.
  15. */
  16. import parse5 from 'parse5';
  17. import { selectAll, selectOne } from 'css-select';
  18. import treeAdapter from 'parse5-htmlparser2-tree-adapter';
  19. // htmlparser2 has a relatively DOM-like tree format, which we'll massage into a DOM elsewhere
  20. const PARSE5_OPTS = {
  21. treeAdapter
  22. };
  23. /**
  24. * Parse HTML into a mutable, serializable DOM Document.
  25. * The DOM implementation is an htmlparser2 DOM enhanced with basic DOM mutation methods.
  26. * @param {String} html HTML to parse into a Document instance
  27. */
  28. export function createDocument(html) {
  29. const document = /** @type {HTMLDocument} */ (
  30. parse5.parse(html, PARSE5_OPTS)
  31. );
  32. defineProperties(document, DocumentExtensions);
  33. // Extend Element.prototype with DOM manipulation methods.
  34. const scratch = document.createElement('div');
  35. // Get a reference to the base Node class - used by createTextNode()
  36. document.$$Node = scratch.constructor;
  37. const elementProto = Object.getPrototypeOf(scratch);
  38. defineProperties(elementProto, ElementExtensions);
  39. elementProto.ownerDocument = document;
  40. return document;
  41. }
  42. /**
  43. * Serialize a Document to an HTML String
  44. * @param {HTMLDocument} document A Document, such as one created via `createDocument()`
  45. */
  46. export function serializeDocument(document) {
  47. return parse5.serialize(document, PARSE5_OPTS);
  48. }
  49. /** @typedef {treeAdapter.Document & typeof ElementExtensions} HTMLDocument */
  50. /**
  51. * Methods and descriptors to mix into Element.prototype
  52. * @private
  53. */
  54. const ElementExtensions = {
  55. /** @extends treeAdapter.Element.prototype */
  56. nodeName: {
  57. get() {
  58. return this.tagName.toUpperCase();
  59. }
  60. },
  61. id: reflectedProperty('id'),
  62. className: reflectedProperty('class'),
  63. insertBefore(child, referenceNode) {
  64. if (!referenceNode) return this.appendChild(child);
  65. treeAdapter.insertBefore(this, child, referenceNode);
  66. return child;
  67. },
  68. appendChild(child) {
  69. treeAdapter.appendChild(this, child);
  70. return child;
  71. },
  72. removeChild(child) {
  73. treeAdapter.detachNode(child);
  74. },
  75. remove() {
  76. treeAdapter.detachNode(this);
  77. },
  78. textContent: {
  79. get() {
  80. return getText(this);
  81. },
  82. set(text) {
  83. this.children = [];
  84. treeAdapter.insertText(this, text);
  85. }
  86. },
  87. setAttribute(name, value) {
  88. if (this.attribs == null) this.attribs = {};
  89. if (value == null) value = '';
  90. this.attribs[name] = value;
  91. },
  92. removeAttribute(name) {
  93. if (this.attribs != null) {
  94. delete this.attribs[name];
  95. }
  96. },
  97. getAttribute(name) {
  98. return this.attribs != null && this.attribs[name];
  99. },
  100. hasAttribute(name) {
  101. return this.attribs != null && this.attribs[name] != null;
  102. },
  103. getAttributeNode(name) {
  104. const value = this.getAttribute(name);
  105. if (value != null) return { specified: true, value };
  106. }
  107. };
  108. /**
  109. * Methods and descriptors to mix into the global document instance
  110. * @private
  111. */
  112. const DocumentExtensions = {
  113. /** @extends treeAdapter.Document.prototype */
  114. // document is just an Element in htmlparser2, giving it a nodeType of ELEMENT_NODE.
  115. // TODO: verify if these are needed for css-select
  116. nodeType: {
  117. get() {
  118. return 9;
  119. }
  120. },
  121. contentType: {
  122. get() {
  123. return 'text/html';
  124. }
  125. },
  126. nodeName: {
  127. get() {
  128. return '#document';
  129. }
  130. },
  131. documentElement: {
  132. get() {
  133. // Find the first <html> element within the document
  134. return this.childNodes.filter(
  135. (child) => String(child.tagName).toLowerCase() === 'html'
  136. );
  137. }
  138. },
  139. compatMode: {
  140. get() {
  141. const compatMode = {
  142. 'no-quirks': 'CSS1Compat',
  143. quirks: 'BackCompat',
  144. 'limited-quirks': 'CSS1Compat'
  145. };
  146. return compatMode[treeAdapter.getDocumentMode(this)];
  147. }
  148. },
  149. head: {
  150. get() {
  151. return this.querySelector('head');
  152. }
  153. },
  154. body: {
  155. get() {
  156. return this.querySelector('body');
  157. }
  158. },
  159. createElement(name) {
  160. return treeAdapter.createElement(name, null, []);
  161. },
  162. createTextNode(text) {
  163. // there is no dedicated createTextNode equivalent exposed in htmlparser2's DOM
  164. const Node = this.$$Node;
  165. return new Node({
  166. type: 'text',
  167. data: text,
  168. parent: null,
  169. prev: null,
  170. next: null
  171. });
  172. },
  173. querySelector(sel) {
  174. return selectOne(sel, this.documentElement);
  175. },
  176. querySelectorAll(sel) {
  177. if (sel === ':root') {
  178. return this;
  179. }
  180. return selectAll(sel, this.documentElement);
  181. }
  182. };
  183. /**
  184. * Essentially `Object.defineProperties()`, except function values are assigned as value descriptors for convenience.
  185. * @private
  186. */
  187. function defineProperties(obj, properties) {
  188. for (const i in properties) {
  189. const value = properties[i];
  190. Object.defineProperty(
  191. obj,
  192. i,
  193. typeof value === 'function' ? { value } : value
  194. );
  195. }
  196. }
  197. /**
  198. * Create a property descriptor defining a getter/setter pair alias for a named attribute.
  199. * @private
  200. */
  201. function reflectedProperty(attributeName) {
  202. return {
  203. get() {
  204. return this.getAttribute(attributeName);
  205. },
  206. set(value) {
  207. this.setAttribute(attributeName, value);
  208. }
  209. };
  210. }
  211. /**
  212. * Helper to get the text content of a node
  213. * https://github.com/fb55/domutils/blob/master/src/stringify.ts#L21
  214. * @private
  215. */
  216. function getText(node) {
  217. if (Array.isArray(node)) return node.map(getText).join('');
  218. if (treeAdapter.isElementNode(node))
  219. return node.name === 'br' ? '\n' : getText(node.children);
  220. if (treeAdapter.isTextNode(node)) return node.data;
  221. return '';
  222. }