Repository: clj-commons/hickory Branch: master Commit: 051c94c552b1 Files: 30 Total size: 172.3 KB Directory structure: gitextract_ux8kf796/ ├── .circleci/ │ └── config.yml ├── .clj-kondo/ │ ├── babashka/ │ │ └── fs/ │ │ └── config.edn │ └── rewrite-clj/ │ └── rewrite-clj/ │ └── config.edn ├── .github/ │ ├── CODEOWNERS │ └── workflows/ │ └── ci.yml ├── .gitignore ├── API.md ├── LICENSE ├── ORIGINATOR ├── README.md ├── bb.edn ├── deps.edn ├── package.json ├── project.clj ├── src/ │ ├── clj/ │ │ └── hickory/ │ │ └── core.clj │ ├── cljc/ │ │ └── hickory/ │ │ ├── convert.cljc │ │ ├── hiccup_utils.cljc │ │ ├── render.cljc │ │ ├── select.cljc │ │ ├── utils.cljc │ │ └── zip.cljc │ └── cljs/ │ └── hickory/ │ └── core.cljs └── test/ ├── cljc/ │ └── hickory/ │ └── test/ │ ├── convert.cljc │ ├── core.cljc │ ├── hiccup_utils.cljc │ ├── render.cljc │ ├── select.cljc │ └── zip.cljc └── cljs/ └── hickory/ ├── advanced.edn └── doo_runner.cljs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .circleci/config.yml ================================================ # Clojure CircleCI 2.0 configuration file # # Check https://circleci.com/docs/2.0/language-clojure/ for more details # version: 2.1 workflows: build-deploy: jobs: - build: filters: tags: only: /.*/ - deploy: requires: - build filters: tags: only: /Release-.*/ context: - CLOJARS_DEPLOY jobs: build: docker: # specify the version you desire here - image: cimg/clojure:1.11.1-browsers # Specify service dependencies here if necessary # CircleCI maintains a library of pre-built images # documented at https://circleci.com/docs/2.0/circleci-images/ # - image: circleci/postgres:9.4 working_directory: ~/repo environment: LEIN_ROOT: "true" # Customize the JVM maximum heap limit JVM_OPTS: -Xmx3200m steps: - checkout # Download and cache dependencies - restore_cache: keys: - v1-dependencies-{{ checksum "project.clj" }} # fallback to using the latest cache if no exact match is found - v1-dependencies- - run: name: Install babashka command: | curl -s https://raw.githubusercontent.com/borkdude/babashka/master/install -o install.sh sudo bash install.sh rm install.sh - run: name: install karma command: | sudo npm install -g karma-cli sudo npm install - run: lein deps - save_cache: paths: - ~/.m2 key: v1-dependencies-{{ checksum "project.clj" }} # run tests! - run: bb test:clj - run: bb test:cljs deploy: docker: - image: cimg/clojure:1.11.1-browsers working_directory: ~/repo environment: LEIN_ROOT: "true" # Customize the JVM maximum heap limit JVM_OPTS: -Xmx3200m steps: - checkout # Download and cache dependencies - restore_cache: keys: - v1-dependencies-{{ checksum "project.clj" }} # fallback to using the latest cache if no exact match is found - v1-dependencies- # Download and cache dependencies - restore_cache: keys: - v1-dependencies-{{ checksum "project.clj" }} # fallback to using the latest cache if no exact match is found - v1-dependencies- - run: name: Install babashka command: | curl -s https://raw.githubusercontent.com/borkdude/babashka/master/install -o install.sh sudo bash install.sh rm install.sh - run: name: Install deployment-script command: | curl -s https://raw.githubusercontent.com/clj-commons/infra/main/deployment/circle-maybe-deploy.bb -o circle-maybe-deploy.bb chmod a+x circle-maybe-deploy.bb - run: lein deps - run: name: Setup GPG signing key command: | GNUPGHOME="$HOME/.gnupg" export GNUPGHOME mkdir -p "$GNUPGHOME" chmod 0700 "$GNUPGHOME" echo "$GPG_KEY" \ | base64 --decode --ignore-garbage \ | gpg --batch --allow-secret-key-import --import gpg --keyid-format LONG --list-secret-keys - save_cache: paths: - ~/.m2 key: v1-dependencies-{{ checksum "project.clj" }} - run: name: Deploy command: | GPG_TTY=$(tty) export GPG_TTY echo $GPG_TTY ./circle-maybe-deploy.bb lein deploy clojars ================================================ FILE: .clj-kondo/babashka/fs/config.edn ================================================ {:lint-as {babashka.fs/with-temp-dir clojure.core/let}} ================================================ FILE: .clj-kondo/rewrite-clj/rewrite-clj/config.edn ================================================ {:lint-as {rewrite-clj.zip/subedit-> clojure.core/-> rewrite-clj.zip/subedit->> clojure.core/->> rewrite-clj.zip/edit-> clojure.core/-> rewrite-clj.zip/edit->> clojure.core/->>}} ================================================ FILE: .github/CODEOWNERS ================================================ * @port19x ================================================ FILE: .github/workflows/ci.yml ================================================ name: ci on: [push, pull_request] jobs: clojure: strategy: matrix: os: [ubuntu-latest] runs-on: ${{ matrix.os }} steps: - name: Checkout uses: actions/checkout@v3 # It is important to install java before installing clojure tools which needs java # exclusions: babashka, clj-kondo and cljstyle - name: Prepare java uses: actions/setup-java@v3 with: distribution: 'zulu' java-version: '8' - name: Install clojure tools uses: DeLaGuardo/setup-clojure@10.0 with: bb: latest lein: latest # Optional step: - name: Cache clojure dependencies uses: actions/cache@v3 with: path: | ~/.m2/repository ~/.gitlibs ~/.deps.clj # List all files containing dependencies: key: cljdeps-${{ hashFiles('deps.edn') }} # key: cljdeps-${{ hashFiles('deps.edn', 'bb.edn') }} # key: cljdeps-${{ hashFiles('project.clj') }} # key: cljdeps-${{ hashFiles('build.boot') }} restore-keys: cljdeps- - name: Execute clj tests run: bb test:clj - name: Execute cljs tests run: | bb test:cljs-npm-install bb test:cljs -c test/cljs/hickory/advanced.edn ================================================ FILE: .gitignore ================================================ /target /lib /classes /checkouts /codox-out pom.xml *.jar *.class .lein-deps-sum .lein-failures .lein-plugins .lein-repl-history out/ .cpcache .cache .clj-kondo/.cache .lsp/.cache .portal/vs-code.edn cljs-test-runner-out node_modules package-lock.json ================================================ FILE: API.md ================================================ # Table of contents - [`hickory.core`](#hickory.core) - [`Attribute`](#hickory.core/attribute) - [`Comment`](#hickory.core/comment) - [`Document`](#hickory.core/document) - [`DocumentType`](#hickory.core/documenttype) - [`Element`](#hickory.core/element) - [`HiccupRepresentable`](#hickory.core/hiccuprepresentable) - Objects that can be represented as Hiccup nodes implement this protocol in order to make the conversion. - [`HickoryRepresentable`](#hickory.core/hickoryrepresentable) - Objects that can be represented as HTML DOM node maps, similar to clojure.xml, implement this protocol to make the conversion. - [`Text`](#hickory.core/text) - [`as-hiccup`](#hickory.core/as-hiccup) - Converts the node given into a hiccup-format data structure. - [`as-hickory`](#hickory.core/as-hickory) - Converts the node given into a hickory-format data structure. - [`extract-doctype`](#hickory.core/extract-doctype) - [`format-doctype`](#hickory.core/format-doctype) - [`node-type`](#hickory.core/node-type) - [`parse`](#hickory.core/parse) - Parse an entire HTML document into a DOM structure that can be used as input to as-hiccup or as-hickory. - [`parse-dom-with-domparser`](#hickory.core/parse-dom-with-domparser) - [`parse-dom-with-write`](#hickory.core/parse-dom-with-write) - Parse an HTML document (or fragment) as a DOM using document.implementation.createHTMLDocument and document.write. - [`parse-fragment`](#hickory.core/parse-fragment) - Parse an HTML fragment (some group of tags that might be at home somewhere in the tag hierarchy under ) into a list of DOM elements that can each be passed as input to as-hiccup or as-hickory. - [`remove-el`](#hickory.core/remove-el) ----- # hickory.core ## `Attribute`

Source

## `Comment`

Source

## `Document`

Source

## `DocumentType`

Source

## `Element`

Source

## `HiccupRepresentable` Objects that can be represented as Hiccup nodes implement this protocol in order to make the conversion.

Source

## `HickoryRepresentable` Objects that can be represented as HTML DOM node maps, similar to clojure.xml, implement this protocol to make the conversion. Each DOM node will be a map or string (for Text/CDATASections). Nodes that are maps have the appropriate subset of the keys :type - [:comment, :document, :document-type, :element] :tag - node's tag, check :type to see if applicable :attrs - node's attributes as a map, check :type to see if applicable :content - node's child nodes, in a vector, check :type to see if applicable

Source

## `Text`

Source

## `as-hiccup` ``` clojure (as-hiccup this) ``` Function. Converts the node given into a hiccup-format data structure. The node must have an implementation of the HiccupRepresentable protocol; nodes created by parse or parse-fragment already do.

Source

## `as-hickory` ``` clojure (as-hickory this) ``` Function. Converts the node given into a hickory-format data structure. The node must have an implementation of the HickoryRepresentable protocol; nodes created by parse or parse-fragment already do.

Source

## `extract-doctype` ``` clojure (extract-doctype s) ``` Function.

Source

## `format-doctype` ``` clojure (format-doctype dt) ``` Function.

Source

## `node-type` ``` clojure (node-type type) ``` Function.

Source

## `parse` ``` clojure (parse s) ``` Function. Parse an entire HTML document into a DOM structure that can be used as input to as-hiccup or as-hickory. ```klipse (-> (parse "foo

Hello

") as-hiccup) ``` ```klipse (-> (parse "foo

Hello

") as-hickory) ```

Source

## `parse-dom-with-domparser` ``` clojure (parse-dom-with-domparser s) ``` Function.

Source

## `parse-dom-with-write` ``` clojure (parse-dom-with-write s) ``` Function. Parse an HTML document (or fragment) as a DOM using document.implementation.createHTMLDocument and document.write.

Source

## `parse-fragment` ``` clojure (parse-fragment s) ``` Function. Parse an HTML fragment (some group of tags that might be at home somewhere in the tag hierarchy under ) into a list of DOM elements that can each be passed as input to as-hiccup or as-hickory.

Source

## `remove-el` ``` clojure (remove-el el) ``` Function.

Source

================================================ FILE: LICENSE ================================================ Source code distributed under the Eclipse Public License - v 1.0: THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 1. DEFINITIONS "Contribution" means: a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and b) in the case of each subsequent Contributor: i) changes to the Program, and ii) additions to the Program; where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program. "Contributor" means any person or entity that distributes the Program. "Licensed Patents" mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program. "Program" means the Contributions distributed in accordance with this Agreement. "Recipient" means anyone who receives the Program under this Agreement, including all Contributors. 2. GRANT OF RIGHTS a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form. b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder. c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program. d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement. 3. REQUIREMENTS A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that: a) it complies with the terms and conditions of this Agreement; and b) its license agreement: i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose; ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits; iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange. When the Program is made available in source code form: a) it must be made available under this Agreement; and b) a copy of this Agreement must be included with each copy of the Program. Contributors may not remove or alter any copyright notices contained within the Program. Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution. 4. COMMERCIAL DISTRIBUTION Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor tocontrol, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense. For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages. 5. NO WARRANTY EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement , including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations. 6. DISCLAIMER OF LIABILITY EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 7. GENERAL If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. If Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed. All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive. Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved. This Agreement is governed by the laws of the State of Washington and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation. ================================================ FILE: ORIGINATOR ================================================ @davidsantiago ================================================ FILE: README.md ================================================ [![Clojars Project](https://img.shields.io/clojars/v/org.clj-commons/hickory.svg)](https://clojars.org/org.clj-commons/hickory) [![cljdoc badge](https://cljdoc.org/badge/org.clj-commons/hickory)](https://cljdoc.org/d/org.clj-commons/hickory) [![CircleCI](https://circleci.com/gh/clj-commons/hickory.svg?style=svg)](https://circleci.com/gh/clj-commons/hickory) # Hickory Hickory parses HTML into Clojure data structures, so you can analyze, transform, and output back to HTML. HTML can be parsed into [hiccup](http://github.com/weavejester/hiccup) vectors, or into a map-based DOM-like format very similar to that used by clojure.xml. It can be used from both Clojure and Clojurescript. ## Usage ### Parsing To start, you will want to process your HTML into a parsed representation. Once the HTML is in this form, it can be converted to either Hiccup or Hickory format for further processing. There are two parsing functions, `parse` and `parse-fragment`. Both take a string containing HTML and return the parser objects representing the document. (It happens that these parser objects are Jsoup Documents and Nodes, but I do not consider this to be an aspect worth preserving if a change in parser should become necessary). The first function, `parse` expects an entire HTML document, and parses it using an HTML5 parser ([Jsoup](http://jsoup.org) on Clojure and the browser's DOM parser in Clojurescript), which will fix up the HTML as much as it can into a well-formed document. The second function, `parse-fragment`, expects some smaller fragment of HTML that does not make up a full document, and thus returns a list of parsed fragments, each of which must be processed individually into Hiccup or Hickory format. For example, if `parse-fragment` is given "`


`" as input, it has no common parent for them, so it must simply give you the list of nodes that it parsed. These parsed objects can be turned into either Hiccup vector trees or Hickory DOM maps using the functions `as-hiccup` or `as-hickory`. Here's a usage example. ```clojure user=> (use 'hickory.core) nil user=> (def parsed-doc (parse "foo")) #'user/parsed-doc user=> (as-hiccup parsed-doc) ([:html {} [:head {}] [:body {} [:a {:href "foo"} "foo"]]]) user=> (as-hickory parsed-doc) {:type :document, :content [{:type :element, :attrs nil, :tag :html, :content [{:type :element, :attrs nil, :tag :head, :content nil} {:type :element, :attrs nil, :tag :body, :content [{:type :element, :attrs {:href "foo"}, :tag :a, :content ["foo"]}]}]}]} user=> (def parsed-frag (parse-fragment "foo bar")) #'user/parsed-frag user=> (as-hiccup parsed-frag) IllegalArgumentException No implementation of method: :as-hiccup of protocol: #'hickory.core/HiccupRepresentable found for class: clojure.lang.PersistentVector clojure.core/-cache-protocol-fn (core_deftype.clj:495) user=> (map as-hiccup parsed-frag) ([:a {:href "foo"} "foo"] " " [:a {:href "bar"} "bar"]) user=> (map as-hickory parsed-frag) ({:type :element, :attrs {:href "foo"}, :tag :a, :content ["foo"]} " " {:type :element, :attrs {:href "bar"}, :tag :a, :content ["bar"]}) ``` In the example above, you can see an HTML document that is parsed once and then converted to both Hiccup and Hickory formats. Similarly, a fragment is parsed, but it cannot be directly used with `as-hiccup` (or `as-hickory`), it must have those functions called on each element in the list instead. The namespace `hickory.zip` provides [zippers](https://clojure.github.io/clojure/clojure.zip-api.html) for both Hiccup and Hickory formatted data, with the functions `hiccup-zip` and `hickory-zip`. Using zippers, you can easily traverse the trees in any order you desire, make edits, and get the resulting tree back. Here is an example of that. ```clojure user=> (use 'hickory.zip) nil user=> (require '[clojure.zip :as zip]) nil user=> (require '[hickory.render :refer [hickory-to-html]]) nil user=> (-> (hiccup-zip (as-hiccup (parse "bar
"))) zip/node) ([:html {} [:head {}] [:body {} [:a {:href "foo"} "bar" [:br {}]]]]) user=> (-> (hiccup-zip (as-hiccup (parse "bar
"))) zip/next zip/node) [:html {} [:head {}] [:body {} [:a {:href "foo"} "bar" [:br {}]]]] user=> (-> (hiccup-zip (as-hiccup (parse "bar
"))) zip/next zip/next zip/node) [:head {}] user=> (-> (hiccup-zip (as-hiccup (parse "bar
"))) zip/next zip/next (zip/replace [:head {:id "a"}]) zip/node) [:head {:id "a"}] user=> (-> (hiccup-zip (as-hiccup (parse "bar
"))) zip/next zip/next (zip/replace [:head {:id "a"}]) zip/root) ([:html {} [:head {:id "a"}] [:body {} [:a {:href "foo"} "bar" [:br {}]]]]) user=> (-> (hickory-zip (as-hickory (parse "bar
"))) zip/next zip/next (zip/replace {:type :element :tag :head :attrs {:id "a"} :content nil}) zip/root) {:type :document, :content [{:type :element, :attrs nil, :tag :html, :content [{:content nil, :type :element, :attrs {:id "a"}, :tag :head} {:type :element, :attrs nil, :tag :body, :content [{:type :element, :attrs {:href "foo"}, :tag :a, :content ["bar" {:type :element, :attrs nil, :tag :br, :content nil}]}]}]}]} user=> (hickory-to-html *1) "bar
" ``` In this example, we can see a basic document being parsed into Hiccup form. Then, using zippers, the HEAD element is navigated to, and then replaced with one that has an id of "a". The final tree, including the modification, is also shown using `zip/root`. Then the same modification is made using Hickory forms and zippers. Finally, the modified Hickory version is printed back to HTML using the `hickory-to-html` function. ### Selectors Hickory also comes with a set of CSS-style selectors that operate on hickory-format data in the `hickory.select` namespace. These selectors do not exactly mirror the selectors in CSS, and are often more powerful. There is no version of these selectors for hiccup-format data, at this point. A selector is simply a function that takes a zipper loc from a hickory html tree data structure as its only argument. The selector will return its argument if the selector applies to it, and nil otherwise. Writing useful selectors can often be involved, so most of the `hickory.select` package is actually made up of selector combinators; functions that return useful selector functions by specializing them to the data given as arguments, or by combining together multiple selectors. For example, if we wanted to figure out the dates of the next Formula 1 race weekend, we could do something like this: ```clojure user=> (use 'hickory.core) nil user=> (require '[hickory.select :as s]) nil user=> (require '[clj-http.client :as client]) nil user=> (require '[clojure.string :as string]) nil user=> (def site-htree (-> (client/get "http://formula1.com/default.html") :body parse as-hickory)) #'user/site-htree user=> (-> (s/select (s/child (s/class "subCalender") ; sic (s/tag :div) (s/id :raceDates) s/first-child (s/tag :b)) site-htree) first :content first string/trim) "10, 11, 12 May 2013" ``` In this example, we get the contents of the homepage and use `select` to give us any nodes that satisfy the criteria laid out by the selectors. The selector in this example is overly precise in order to illustrate more selectors than we need; we could have gotten by just selecting the contents of the P and then B tags inside the element with id "raceDates". Using the selectors allows you to search large HTML documents for nodes of interest with a relatively small amount of code. There are many selectors available in the [`hickory.select`](https://cljdoc.org/d/org.clj-commons/hickory/CURRENT/api/hickory.select) namespace, including: - `node-type`: Give this function a keyword or string that names the contents of the `:type` field in a hickory node, and it gives you a selector that will select nodes of that type. Example: `(node-type :comment)` - `tag`: Give this function a keyword or string that names the contents of the `:tag` field in a hickory node, and it gives you a selector that will select nodes with that tag. Example: `(tag :div)` - `attr`: Give this function a keyword or string that names an attribute in the `:attrs` map of a hickory node, and it gives you a selector that will select nodes whose `:attrs` map contains that key. Give a single-argument function as an additional argument, and the resulting selector function will additionally require the value of that key to be such that the function given as the last argument returns true. Example: `(attr :id #(.startsWith % "foo"))` - `id`: Give this function a keyword or string that names the `:id` attribute in the `:attrs` map and it will return a selector function that selects nodes that have that id (this comparison is case-insensitive). Example: `(id :raceDates)` - `class`: Give this function a keyword or string that names a class that the node should have in the `:class` attribute in the `:attrs` map, and it will return a function that selects nodes that have the given class somewhere in their class string. Example: `(class :foo)` - `any`: This selector takes no arguments, do not invoke it; returns any node that is an element, similarly to CSS's '*' selector. - `element`: This selector is equivalent to the `any` selector; this alternate name can make it clearer when the intention is to exclude non-element nodes from consideration. - `root`: This selector takes no arguments and should not be invoked; simply returns the root node (the HTML element). - `n-moves-until`: This selector returns a selector function that selects its argument if that argument is some distance from a boundary. The first two arguments, `n` and `c` define the counting: it only selects nodes whose distance can be written in the form `nk+c` for some natural number `k`. The distance and boundary are defined by the number of times the zipper-movement function in the third argument is applied before the boundary function in the last argument is true. See doc string for details. - `nth-of-type`: This selector returns a selector function that selects its argument if that argument is the `(nk+c)`'th child of the given tag type of some parent node for some natural `k`. Optionally, instead of the `n` and `c` arguments, the keywords `:odd` and `:even` can be given. - `nth-last-of-type`: Just like `nth-of-type` but counts backwards from the last sibling. - `nth-child`: This selector returns a selector function that selects its argument if that argument is the `(nk+c)`'th child of its parent node for some natural `k`. Instead of the `n` and `c` arguments, the keywords `:odd` and `:even` can be given. - `nth-last-child`: Just like `nth-last-child` but counts backwards from the last sibling. - `first-child`: Takes no arguments, do not invoke it; equivalent to `(nth-child 1)`. - `last-child`: Takes no arguments, do not invoke it; equivalent to `(nth-last-child 1)`. There are also selector combinators, which take as argument some number of other selectors, and return a new selector that combines them into one larger selector. An example of this is the `child` selector in the example above. Here's a list of some selector combinators in the package (see the [API Documentation](https://cljdoc.org/d/org.clj-commons/hickory) for the full list): - `and`: Takes any number of selectors, and returns a selector that only selects nodes for which all of the argument selectors are true. - `or`: Takes any number of selectors, and retrurns a selector that only selects nodes for which at least one of the argument selectors are true. - `not`: Takes a single selector as argument and returns a selector that only selects nodes that its argument selector does not. - `el-not`: Takes a single selector as argument and returns a selector that only selects element nodes that its argument selector does not. - `child`: Takes any number of selectors as arguments and returns a selector that returns true when the zipper location given as the argument is at the end of a chain of direct child relationships specified by the selectors given as arguments. - `descendant`: Takes any number of selectors as arguments and returns a selector that returns true when the zipper location given as the argument is at the end of a chain of descendant relationships specified by the selectors given as arguments. We can illustrate the selector combinators by continuing the Formula 1 example above. We suspect, to our dismay, that Sebastian Vettel is leading the championship for the fourth year in a row. ```clojure user=> (-> (s/select (s/descendant (s/class "subModule") (s/class "standings") (s/and (s/tag :tr) s/first-child) (s/and (s/tag :td) (s/nth-child 2)) (s/tag :a)) site-htree) first :content first string/trim) "Sebastian Vettel" ``` Our fears are confirmed, Sebastian Vettel is well on his way to a fourth consecutive championship. If you were to inspect the page by hand (as of around May 2013, at least), you would see that unlike the `child` selector we used in the example above, the `descendant` selector allows the argument selectors to skip stages in the tree; we've left out some elements in this descendant relationship. The first table row in the driver standings table is selected with the `and`, `tag` and `first-child` selectors, and then the second `td` element is chosen, which is the element that has the driver's name (the first table element has the driver's standing) inside an `A` element. All of this is dependent on the exact layout of the HTML in the site we are examining, of course, but it should give an idea of how you can combine selectors to reach into a specific node of an HTML document very easily. Finally, it's worth noting that the `select` function itself returns the hickory zipper nodes it finds. This is most useful for analyzing the contents of nodes. However, sometimes you may wish to examine the area around a node once you've found it. For this, you can use the `select-locs` function, which returns a sequence of hickory zipper locs, instead of the nodes themselves. This will allow you to navigate around the document tree using the zipper functions in `clojure.zip`. If you wish to go further and actually modify the document tree using zipper functions, you should not use `select-locs`. The problem is that it returns a bunch of zipper locs, but once you modify one, the others are out of date and do not see the changes (just as with any other persistent data structure in Clojure). Thus, their presence was useless and possibly confusing. Instead, you should use the `select-next-loc` function to walk through the document tree manually, moving through the locs that satisfy the selector function one by one, which will allow you to make modifications as you go. As with modifying any data structure as you traverse it, you must still be careful that your code does not add the thing it is selecting for, or it could get caught in an infinite loop. Finally, for more specialized selection needs, it should be possible to write custom selection functions that use the selectors and zipper functions without too much work. The functions discussed in this paragraph are very short and simple, you can use them as a guide. The doc strings for the functions in the [`hickory.select`](https://cljdoc.org/d/org.clj-commons/hickory/CURRENT/api/hickory.select) namespace provide more details on most of these functions. For more details, see the [API Documentation](https://cljdoc.org/d/org.clj-commons/hickory/). ## Hickory format Why two formats? It's very easy to see in the example above, Hiccup is very convenient to use for writing HTML. It has a compact syntax, with CSS-like shortcuts for specifying classes and ids. It also allows parts of the vector to be skipped if they are not important. It's a little bit harder to process data in Hiccup format. First of all, each form has to be checked for the presence of the attribute map, and the traversal adjusted accordingly. Raw Hiccup vectors might also have information about class and id in one of two different places. Finally, not every piece of an HTML document can be expressed in Hiccup without resorting to writing HTML in strings. For example, if you want to put a doctype or comment on your document, it has to be done as a string in your Hiccup form containing "``" or "``". The Hickory format is another data format intended to allow a roundtrip from HTML as text, into a data structure that is easy to process and modify, and back into equivalent (but not identical, in general) HTML. Because it can express all parts of an HTML document in a parsed form, it is easier to search and modify the structure of the document. A Hickory node is either a map or a string. If it is a map, it will have some subset of the following four keys, depending on the `:type`: - `:type` - This will be one of `:comment`, `:document`, `:document-type`, `:element` - `:tag` - A node's tag (for example, `:img`). This will only be present for nodes of type `:element`. - `:attrs` - A node's attributes, as a map of keywords to values (for example, {:href "/a"}). This will only be present for nodes of type `:element`. - `:content` - A node's child nodes, in a vector. Only `:comment`, `:document`, and `:element` nodes have children. Text and CDATA nodes are represented as strings. This is almost the exact same structure used by [clojure.xml](https://clojure.github.io/clojure/clojure.xml-api.html), the only difference being the addition of the `:type` field. Having this field allows us to process nodes that clojure.xml leaves out of the parsed data, like doctype and comments. ## Obtaining To get hickory, add ```clojure [org.clj-commons/hickory "0.7.3"] ``` to your project.clj, or an equivalent entry for your Maven-compatible build tool. ## ClojureScript support Hickory works for all web browsers IE9+ (you can find a workaround for IE9 [here](http://stackoverflow.com/questions/9250545/javascript-domparser-access-innerhtml-and-other-properties)). ## Nodejs support To parse markup on Nodejs, Hickory requires a Node DOM implementation. Several are available from [npm](https://www.npmjs.com). Install the npm package or use [lein-npm](https://github.com/RyanMcG/lein-npm). Here are some alternatives: - [jsdom](https://www.npmjs.com/package/jsdom) - **Caution:** this will not work if you're using figwheel ```clojure (set! js/document (.jsdom (cljs.nodejs/require "jsdom"))) ``` - [xmldom](https://www.npmjs.com/package/xmldom) ```clojure (set! js/DOMParser (.-DOMParser (cljs.nodejs/require "xmldom"))) ``` ## Changes - Version 0.7.1. Thanks to [Matt Grimm](https://github.com/tkocmathla) for adding the up-pred zipper function. - Version 0.7.0. Thanks to [Ricardo J. Méndez](https://github.com/ricardojmendez) for the following updates. * Removed dependency on cljx, since it was deprecated in June 2015. * Converted all files and conditionals to cljc. * Moved tests to cljs.test with doo, since cemerick.test was deprecated over a year ago. * Updated Clojure and ClojureScript dependencies to avoid conflicts. * Updated JSoup to 1.9.2, which should bring improved parsing performance. - Released version 0.6.0. * Updated JSoup to version 1.8.3. This version of JSoup contains bug fixes, but slightly changes the way it handles HTML: some parses and output might have different case than before. HTML is still case-insensitive, of course, but Hickory minor version has been increased just in case. API and semantics are otherwise unchanged. - Released version 0.5.4. * Fixed project dependencies so ClojureScript is moved to a dev-dependency. - Released version 0.5.3. * Minor bug fix to accommodate ClojureScript's new type hinting support. - Released version 0.5.2. * Updates the Clojurescript version to use the latest version of Clojurescript (0.0-1934). - Released version 0.5.1. * Added `has-child` and `has-descendant` selectors. Be careful with `has-descendant`, as it must do a full subtree search on each node, which is not fast. - Released version 0.5.0. * Now works in Clojurescript as well, huge thanks to [Julien Eluard](https://github.com/jeluard) for doing the heavy lifting on this. * Reorganized parts of the API into more granular namespaces for better organization. * Added functions to convert between Hiccup and Hickory format; note that this conversion is not always exact or roundtripable, and can cause a full HTML reparse. * Added new selector, `element-child`, which selects element nodes that are the child of another element node. * Numerous bug fixes and improvements. - Released version 0.4.1, which adds a number of new selectors and selector combinators, including `find-in-text`, `precede-adjacent`, `follow-adjacent`, `precede` and `follow`. - Released version 0.4.0. Adds the `hickory.select` namespace with many helpful functions for searching through hickory-format HTML documents for specific nodes. - Released version 0.3.0. Provides a more helpful error message when hickory-to-html has an error. Now requires Clojure 1.4. - Released version 0.2.3. Fixes a bug where hickory-to-html was not html-escaping the values of tag attributes. - Released version 0.2.2. Fixes a bug where hickory-to-html was improperly html-escaping the contents of script/style tags. - Released version 0.2.1. This version fixes bugs: * hickory-to-html now properly escapes text nodes * text nodes will now preserve whitespace correctly - Released version 0.2.0. This version adds a second parsed data format, explained above. To support this, the API for `parse` and `parse-fragment` has been changed to allow their return values to be passed to functions `as-hiccup` or `as-hickory` to determine the final format. Also added are zippers for both Hiccup and Hickory formats. ## License Copyright © 2012 David Santiago Distributed under the Eclipse Public License, the same as Clojure. ================================================ FILE: bb.edn ================================================ {:deps {io.github.borkdude/lein2deps {:git/url "https://github.com/borkdude/lein2deps" :git/sha "e26edeb114c9d88a5c4d3abb683306588fcaad13"}} :tasks {:requires ([babashka.fs :as fs]) test:clj (shell "lein test") test:cljs-npm-install (do (shell "npm install -g karma-cli") (shell "npm install")) test:cljs (do (fs/delete-tree "cljs-test-runner-out") (apply clojure "-M:test:cljs:cljs-test-runner" "--env" "chrome-headless" *command-line-args*)) repl:cljs-node (shell "rlwrap bb clojure -M:test:cljs -m cljs.main -re node") repl:cljs-browser (shell "rlwrap bb clojure -M:test:cljs -m cljs.main") quickdoc {:extra-deps {io.github.borkdude/quickdoc {:git/tag "v0.2.5", :git/sha "25784ca"}} :requires ([quickdoc.api :as api]) :task (api/quickdoc {:source-paths ["src/clj" "src/cljs" "src/cljs"] :git/branch "master" :github/repo "https://github.com/clj-commons/hickory"})}} } ================================================ FILE: deps.edn ================================================ {:paths ["src/clj" "src/cljc" "src/cljs"] :deps {org.clojure/clojure {:mvn/version "1.11.1"} org.jsoup/jsoup {:mvn/version "1.22.2"}} :aliases {:test {:extra-paths ["test/cljc"]} :cljs {:extra-deps {org.clojure/clojurescript {:mvn/version "1.11.60"}}} :cljs-test-runner {:extra-deps {olical/cljs-test-runner {:mvn/version "3.8.0"} viebel/codox-klipse-theme {:mvn/version "0.0.1"}} :extra-paths ["cljs-test-runner-out"] :main-opts ["-m" "cljs-test-runner.main" "--env" "chrome-headless" "--namespace-regex" "hickory.test.*"]}}} ================================================ FILE: package.json ================================================ { "devDependencies": { "karma": "^6.4.1", "karma-chrome-launcher": "^3.1.1", "karma-cli": "^2.0.0", "karma-cljs-test": "^0.1.0" } } ================================================ FILE: project.clj ================================================ (defproject org.clj-commons/hickory (or (System/getenv "PROJECT_VERSION") "0.7.1") :description "HTML as Data" :url "https://github.com/clj-commons/hickory" :license {:name "Eclipse Public License" :url "http://www.eclipse.org/legal/epl-v10.html"} :deploy-repositories [["clojars" {:url "https://repo.clojars.org" :username :env/clojars_username :password :env/clojars_org_clj_commons_password :sign-releases true}]] :dependencies [[org.clojure/clojure "1.11.1"] [org.clojure/clojurescript "1.11.60" :scope "provided"] [org.jsoup/jsoup "1.22.2"]] :source-paths ["src/clj" "src/cljc" "src/cljs"] :profiles {:dev {:source-paths ["src/clj" "src/cljc"]} :test {:source-paths ["src/cljs" "src/cljc" "test/cljc" "test/cljs"]}}) ================================================ FILE: src/clj/hickory/core.clj ================================================ (ns hickory.core (:require [hickory.utils :as utils] [hickory.zip :as hzip] [clojure.zip :as zip]) (:import [org.jsoup Jsoup] [org.jsoup.nodes Attribute Attributes Comment DataNode Document DocumentType Element TextNode XmlDeclaration] [org.jsoup.parser Tag Parser])) (set! *warn-on-reflection* true) (defn- end-or-recur [as-fn loc data & [skip-child?]] (let [new-loc (-> loc (zip/replace data) zip/next (cond-> skip-child? zip/next))] (if (zip/end? new-loc) (zip/root new-loc) #(as-fn (zip/node new-loc) new-loc)))) ;; ;; Protocols ;; (defprotocol HiccupRepresentable "Objects that can be represented as Hiccup nodes implement this protocol in order to make the conversion." (as-hiccup [this] [this zip-loc] "Converts the node given into a hiccup-format data structure. The node must have an implementation of the HiccupRepresentable protocol; nodes created by parse or parse-fragment already do.")) (defprotocol HickoryRepresentable "Objects that can be represented as HTML DOM node maps, similar to clojure.xml, implement this protocol to make the conversion. Each DOM node will be a map or string (for Text/CDATASections). Nodes that are maps have the appropriate subset of the keys :type - [:comment, :document, :document-type, :element] :tag - node's tag, check :type to see if applicable :attrs - node's attributes as a map, check :type to see if applicable :content - node's child nodes, in a vector, check :type to see if applicable" (as-hickory [this] [this zip-loc] "Converts the node given into a hickory-format data structure. The node must have an implementation of the HickoryRepresentable protocol; nodes created by parse or parse-fragment already do.")) (extend-protocol HiccupRepresentable Attribute ;; Note the attribute value is not html-escaped; see comment for Element. (as-hiccup ([this] (trampoline as-hiccup this (hzip/hiccup-zip this))) ([this _] [(utils/lower-case-keyword (.getKey this)) (.getValue this)])) Attributes (as-hiccup ([this] (trampoline as-hiccup this (hzip/hiccup-zip this))) ([this _] (into {} (map as-hiccup this)))) Comment (as-hiccup ([this] (trampoline as-hiccup this (hzip/hiccup-zip this))) ([this loc] (end-or-recur as-hiccup loc (str "")))) DataNode (as-hiccup ([this] (trampoline as-hiccup this (hzip/hiccup-zip this))) ([this loc] (end-or-recur as-hiccup loc (str this)))) Document (as-hiccup ([this] (trampoline as-hiccup this (hzip/hiccup-zip this))) ([this loc] (end-or-recur as-hiccup loc (apply list (.childNodes this))))) DocumentType (as-hiccup ([this] (trampoline as-hiccup this (hzip/hiccup-zip this))) ([this loc] (end-or-recur as-hiccup loc (utils/render-doctype (.name this) (.publicId this) (.systemId this))))) Element (as-hiccup ([this] (trampoline as-hiccup this (hzip/hiccup-zip this))) ([this loc] ;; There is an issue with the hiccup format, which is that it ;; can't quite cover all the pieces of HTML, so anything it ;; doesn't cover is thrown into a string containing the raw ;; HTML. This presents a problem because it is then never the case ;; that a string in a hiccup form should be html-escaped (except ;; in an attribute value) when rendering; it should already have ;; any escaping. Since the HTML parser quite properly un-escapes ;; HTML where it should, we have to go back and un-un-escape it ;; wherever text would have been un-escaped. We do this by ;; html-escaping the parsed contents of text nodes, and not ;; html-escaping comments, data-nodes, and the contents of ;; unescapable nodes. (let [tag (utils/lower-case-keyword (.tagName this)) children (cond->> (.childNodes this) (utils/unescapable-content tag) (map str)) data (into [] (concat [tag (trampoline as-hiccup (.attributes this))] children))] (end-or-recur as-hiccup loc data (utils/unescapable-content tag))))) TextNode ;; See comment for Element re: html escaping. (as-hiccup ([this] (trampoline as-hiccup this (hzip/hiccup-zip this))) ([this loc] (end-or-recur as-hiccup loc (utils/html-escape (.getWholeText this))))) XmlDeclaration (as-hiccup ([this] (trampoline as-hiccup this (hzip/hiccup-zip this))) ([this loc] (end-or-recur as-hiccup loc (str this))))) (extend-protocol HickoryRepresentable Attribute (as-hickory ([this] (trampoline as-hickory this (hzip/hickory-zip this))) ([this _] [(utils/lower-case-keyword (.getKey this)) (.getValue this)])) Attributes (as-hickory ([this] (trampoline as-hickory this (hzip/hickory-zip this))) ([this _] (not-empty (into {} (map as-hickory this))))) Comment (as-hickory ([this] (trampoline as-hickory this (hzip/hickory-zip this))) ([this loc] (end-or-recur as-hickory loc {:type :comment :content [(.getData this)]} true))) DataNode (as-hickory ([this] (trampoline as-hickory this (hzip/hickory-zip this))) ([this loc] (end-or-recur as-hickory loc (str this)))) Document (as-hickory ([this] (trampoline as-hickory this (hzip/hickory-zip this))) ([this loc] (end-or-recur as-hickory loc {:type :document :content (or (seq (.childNodes this)) nil)}))) DocumentType (as-hickory ([this] (trampoline as-hickory this (hzip/hickory-zip this))) ([this loc] (end-or-recur as-hickory loc {:type :document-type :attrs (trampoline as-hickory (.attributes this))}))) Element (as-hickory ([this] (trampoline as-hickory this (hzip/hickory-zip this))) ([this loc] (end-or-recur as-hickory loc {:type :element :attrs (trampoline as-hickory (.attributes this)) :tag (utils/lower-case-keyword (.tagName this)) :content (or (seq (.childNodes this)) nil)}))) TextNode (as-hickory ([this] (trampoline as-hickory this (hzip/hickory-zip this))) ([this loc] (end-or-recur as-hickory loc (.getWholeText this))))) (defn parse "Parse an entire HTML document into a DOM structure that can be used as input to as-hiccup or as-hickory." [s] (cond (instance? String s) (Jsoup/parse ^String s) (instance? java.io.File s) (Jsoup/parse ^java.io.File s) (instance? java.nio.file.Path s) (Jsoup/parse ^java.nio.file.Path s) :else (throw (ex-info "Invalid input for parse" {:type (type s)})))) (defn parse-fragment "Parse an HTML fragment (some group of tags that might be at home somewhere in the tag hierarchy under ) into a list of DOM elements that can each be passed as input to as-hiccup or as-hickory." [s] (into [] (Parser/parseFragment s (Element. (Tag/valueOf "body") "") ""))) ================================================ FILE: src/cljc/hickory/convert.cljc ================================================ (ns hickory.convert "Functions to convert from one representation to another." (:require [hickory.render :as render] [hickory.core :as core] [hickory.utils :as utils])) (defn hiccup-to-hickory "Given a sequence of hiccup forms representing a full document, returns an equivalent hickory node representation of that document. This will perform HTML5 parsing as a full document, no matter what it is given. Note that this function is heavyweight: it requires a full HTML re-parse to work." [hiccup-forms] (core/as-hickory (core/parse (render/hiccup-to-html hiccup-forms)))) (defn hiccup-fragment-to-hickory "Given a sequence of hiccup forms representing a document fragment, returns an equivalent sequence of hickory fragments. Note that this function is heavyweight: it requires a full HTML re-parse to work." [hiccup-forms] (map core/as-hickory (core/parse-fragment (render/hiccup-to-html hiccup-forms)))) (defn hickory-to-hiccup "Given a hickory format dom object, returns an equivalent hiccup representation. This can be done directly and exactly, but in general you will not be able to go back from the hiccup." [dom] (if (string? dom) (utils/html-escape dom) (case (:type dom) :document (mapv hickory-to-hiccup (:content dom)) :document-type (utils/render-doctype (get-in dom [:attrs :name]) (get-in dom [:attrs :publicid]) (get-in dom [:attrs :systemid])) :element (if (utils/unescapable-content (:tag dom)) (if (every? string? (:content dom)) ;; Merge :attrs contents with {} to prevent nil from getting into ;; the hiccup forms when there are no attributes. (apply vector (:tag dom) (into {} (:attrs dom)) (:content dom)) (throw (ex-info "An unescapable content tag had non-string children." {:error-location dom}))) (apply vector (:tag dom) (into {} (:attrs dom)) (map hickory-to-hiccup (:content dom)))) :comment (str "")))) ================================================ FILE: src/cljc/hickory/hiccup_utils.cljc ================================================ (ns hickory.hiccup-utils "Utilities for working with hiccup forms." (:require [clojure.string :as str])) (defn- first-idx "Given two possible indexes, returns the lesser that is not -1. If both are -1, then -1 is returned. Useful for searching strings for multiple markers, as many routines will return -1 for not found. Examples: (first-idx -1 -1) => -1 (first-idx -1 2) => 2 (first-idx 5 -1) => 5 (first-idx 5 3) => 3" #?(:clj [^long a ^long b] :cljs [a b]) (if (== a -1) b (if (== b -1) a (min a b)))) (defn- index-of ([^String s c] #?(:clj (.indexOf s (int c)) :cljs (.indexOf s c))) ([^String s c idx] #?(:clj (.indexOf s (int c) (int idx)) :cljs (.indexOf s c idx)))) (defn- split-keep-trailing-empty "clojure.string/split is a wrapper on java.lang.String/split with the limit parameter equal to 0, which keeps leading empty strings, but discards trailing empty strings. This makes no sense, so we have to write our own to keep the trailing empty strings." [s re] (str/split s re -1)) (defn tag-well-formed? "Given a hiccup tag element, returns true iff the tag is in 'valid' hiccup format. Which in this function means: 1. Tag name is non-empty. 2. If there is an id, there is only one. 3. If there is an id, it is nonempty. 4. If there is an id, it comes before any classes. 5. Any class name is nonempty." [tag-elem] (let [tag-elem (name tag-elem) hash-idx (int (index-of tag-elem \#)) dot-idx (int (index-of tag-elem \.)) tag-cutoff (first-idx hash-idx dot-idx)] (and (< 0 (count tag-elem)) ;; 1. (if (== tag-cutoff -1) true (> tag-cutoff 0)) ;; 1. (if (== hash-idx -1) true (and (== -1 (index-of tag-elem \# (inc hash-idx))) ;; 2. (< (inc hash-idx) (first-idx (index-of tag-elem \. ;; 3. (inc hash-idx)) (count tag-elem))))) (if (and (not= hash-idx -1) (not= dot-idx -1)) ;; 4. (< hash-idx dot-idx) true) (if (== dot-idx -1) ;; 5. true (let [classes (.substring tag-elem (inc dot-idx))] (every? #(< 0 (count %)) (split-keep-trailing-empty classes #"\."))))))) (defn tag-name "Given a well-formed hiccup tag element, return just the tag name as a string." [tag-elem] (let [tag-elem (name tag-elem) hash-idx (int (index-of tag-elem \#)) dot-idx (int (index-of tag-elem \.)) cutoff (first-idx hash-idx dot-idx)] (if (== cutoff -1) ;; No classes or ids, so the entire tag-element is the name. tag-elem ;; There was a class or id, so the tag name ends at the first ;; of those. (.substring tag-elem 0 cutoff)))) (defn class-names "Given a well-formed hiccup tag element, return a vector containing any class names included in the tag, as strings. Ignores the hiccup requirement that any id on the tag must come first. Example: :div.foo.bar => [\"foo\" \"bar\"]." [tag-elem] (let [tag-elem (name tag-elem)] (loop [curr-dot (index-of tag-elem \.) classes (transient [])] (if (== curr-dot -1) ;; Didn't find another dot, so no more classes. (persistent! classes) ;; There's another dot, so there's another class. (let [next-dot (index-of tag-elem \. (inc curr-dot)) next-hash (index-of tag-elem \# (inc curr-dot)) cutoff (first-idx next-dot next-hash)] (if (== cutoff -1) ;; Rest of the tag element is the last class. (recur next-dot (conj! classes (.substring tag-elem (inc curr-dot)))) ;; The current class name is terminated by another element. (recur next-dot (conj! classes (.substring tag-elem (inc curr-dot) cutoff))))))))) (defn id "Given a well-formed hiccup tag element, return a string containing the id, or nil if there isn't one." [tag-elem] (let [tag-elem (name tag-elem) hash-idx (int (index-of tag-elem \#)) next-dot-idx (int (index-of tag-elem \. hash-idx))] (if (== hash-idx -1) nil (if (== next-dot-idx -1) (.substring tag-elem (inc hash-idx)) (.substring tag-elem (inc hash-idx) next-dot-idx))))) (defn- expand-content-seqs "Given a sequence of hiccup forms, presumably the content forms of another hiccup element, return a new sequence with any sequence elements expanded into the main sequence. This logic does not apply recursively, so sequences inside sequences won't be expanded out. Also note that this really only applies to sequences; things that seq? returns true on. So this excludes vectors. (expand-content-seqs [1 '(2 3) (for [x [1 2 3]] (* x 2)) [5]]) ==> (1 2 3 2 4 6 [5])" [content] (loop [remaining-content content result (transient [])] (if (nil? remaining-content) (persistent! result) (if (seq? (first remaining-content)) (recur (next remaining-content) ;; Fairly unhappy with this nested loop, but it seems ;; necessary to continue the handling of transient vector. (loop [remaining-seq (first remaining-content) result result] (if (nil? remaining-seq) result (recur (next remaining-seq) (conj! result (first remaining-seq)))))) (recur (next remaining-content) (conj! result (first remaining-content))))))) (defn- normalize-element "Given a well-formed hiccup form, ensure that it is in the form [tag attributes content1 ... contentN]. That is, an unadorned tag name (keyword, lowercase), all attributes in the attribute map in the second element, and then any children. Note that this does not happen recursively; content is not modified." [hiccup-form] (let [[tag-elem & content] hiccup-form] (when (not (tag-well-formed? tag-elem)) (throw (ex-info (str "Invalid input: Tag element" tag-elem "is not well-formed.") {}))) (let [tag-name (keyword (str/lower-case (tag-name tag-elem))) tag-classes (class-names tag-elem) tag-id (id tag-elem) tag-attrs {:id tag-id :class (if (not (empty? tag-classes)) (str/join " " tag-classes))} [map-attrs content] (if (map? (first content)) [(first content) (rest content)] [nil content]) ;; Note that we replace tag attributes with map attributes, without ;; merging them. This is to match hiccup's behavior. attrs (merge tag-attrs map-attrs)] (apply vector tag-name attrs content)))) (defn normalize-form "Given a well-formed hiccup form, recursively normalizes it, so that it and all children elements will also be normalized. A normalized form is in the form [tag attributes content1 ... contentN]. That is, an unadorned tag name (keyword, lowercase), all attributes in the attribute map in the second element, and then any children. Any content that is a sequence is also expanded out into the main sequence of content items." [form] (if (string? form) form ;; Do a pre-order walk and save the first two items, then do the children, ;; then glue them back together. (let [[tag attrs & contents] (normalize-element form)] (apply vector tag attrs (map #(if (vector? %) ;; Recurse only on vec children. (normalize-form %) %) (expand-content-seqs contents)))))) ================================================ FILE: src/cljc/hickory/render.cljc ================================================ (ns hickory.render (:require [hickory.hiccup-utils :as hu] [hickory.utils :as utils] [clojure.string :as str])) ;; ;; Hickory to HTML ;; (defn- render-hickory-attribute "Given a map entry m, representing the attribute name and value, returns a string representing that key/value pair as it would be rendered into HTML." [m] (str " " (name (key m)) "=\"" (utils/html-escape (val m)) "\"")) (defn hickory-to-html "Given a hickory HTML DOM map structure (as returned by as-hickory), returns a string containing HTML it represents. Keep in mind this function is not super fast or heavy-duty. Note that it will NOT in general be the case that (= my-html-src (hickory-to-html (as-hickory (parse my-html-src)))) as we do not keep any letter case or whitespace information, any \"tag-soupy\" elements, attribute quote characters used, etc." [dom] (if (string? dom) (utils/html-escape dom) (try (case (:type dom) :document (apply str (map hickory-to-html (:content dom))) :document-type (utils/render-doctype (get-in dom [:attrs :name]) (get-in dom [:attrs :publicid]) (get-in dom [:attrs :systemid])) :element (cond (utils/void-element (:tag dom)) (str "<" (name (:tag dom)) (apply str (map render-hickory-attribute (:attrs dom))) ">") (utils/unescapable-content (:tag dom)) (str "<" (name (:tag dom)) (apply str (map render-hickory-attribute (:attrs dom))) ">" (apply str (:content dom)) ;; Won't get html-escaped. "") :else (str "<" (name (:tag dom)) (apply str (map render-hickory-attribute (:attrs dom))) ">" (apply str (map hickory-to-html (:content dom))) "")) :comment (str "")) (catch #?(:clj IllegalArgumentException :cljs js/Error) e (throw (if (utils/starts-with #?(:clj (.getMessage e) :cljs (.-message e)) "No matching clause: ") (ex-info (str "Not a valid node: " (pr-str dom)) {:dom dom}) e)))))) ;; ;; Hiccup to HTML ;; (defn- render-hiccup-attrs "Given a hiccup attribute map, returns a string containing the attributes rendered as they should appear in an HTML tag, right after the tag (including a leading space to separate from the tag, if any attributes present)." [attrs] ;; Hiccup normally does not html-escape strings, but it does for attribute ;; values. (let [attrs-str (->> (for [[k v] attrs] (cond (true? v) (str (name k)) (nil? v) "" :else (str (name k) "=" "\"" (utils/html-escape v) "\""))) (filter #(not (empty? %))) sort (str/join " "))] (if (not (empty? attrs-str)) ;; If the attrs-str is not "", we need to pad the front so that the ;; tag will separate from the attributes. Otherwise, "" is fine to return. (str " " attrs-str) attrs-str))) (declare hiccup-to-html) (defn- render-hiccup-element "Given a normalized hiccup element (such as the output of hickory.hiccup-utils/normalize-form; see this function's docstring for more detailed definition of a normalized hiccup element), renders it to HTML and returns it as a string." [n-element] (let [[tag attrs & content] n-element] (if (utils/void-element tag) (str "<" (name tag) (render-hiccup-attrs attrs) ">") (str "<" (name tag) (render-hiccup-attrs attrs) ">" (hiccup-to-html content) "")))) (defn- render-hiccup-form "Given a normalized hiccup form (such as the output of hickory.hiccup-utils/normalize-form; see this function's docstring for more detailed definition of a normalized hiccup form), renders it to HTML and returns it as a string." [n-form] (if (vector? n-form) (render-hiccup-element n-form) n-form)) (defn hiccup-to-html "Given a sequence of hiccup forms (as returned by as-hiccup), returns a string containing HTML it represents. Keep in mind this function is not super fast or heavy-duty, and definitely not a replacement for dedicated hiccup renderers, like hiccup itself, which *is* fast and heavy-duty. ```klipse (hiccup-to-html '([:html {} [:head {}] [:body {} [:a {} \"foo\"]]])) ``` Note that it will NOT in general be the case that (= my-html-src (hiccup-to-html (as-hiccup (parse my-html-src)))) as we do not keep any letter case or whitespace information, any \"tag-soupy\" elements, attribute quote characters used, etc. It will also not generally be the case that this function's output will exactly match hiccup's. For instance: ```klipse (hiccup-to-html (as-hiccup (parse \"foo\"))) ``` " [hiccup-forms] (apply str (map #(render-hiccup-form (hu/normalize-form %)) hiccup-forms))) ================================================ FILE: src/cljc/hickory/select.cljc ================================================ (ns hickory.select "Functions to query hickory-format HTML data. See clojure.zip for more information on zippers, locs, nodes, next, etc." (:require [clojure.zip :as zip] [clojure.string :as string] [hickory.zip :as hzip]) #?(:clj (:import clojure.lang.IFn)) (:refer-clojure :exclude [and or not class])) ;; ;; Utilities ;; (defn until "Calls f on val until pred called on the result is true. If not, it repeats by calling f on the result, etc. The value that made pred return true is returned." [f val pred] (let [next-val (f val)] (if (pred next-val) next-val (recur f next-val pred)))) (defn count-until "Calls f on val until pred called on the result is true. If not, it repeats by calling f on the result, etc. The count of times this process was repeated until pred returned true is returned." [f val pred] (loop [next-val val cnt 0] (if (pred next-val) cnt (recur (f next-val) (inc cnt))))) (defn next-pred "Like clojure.zip/next, but moves until it reaches a node that returns true when the function in the pred argument is called on them, or reaches the end." [hzip-loc pred] (until zip/next hzip-loc #(clojure.core/or (zip/end? %) (pred %)))) (defn prev-pred "Like clojure.zip/prev, but moves until it reaches a node that returns true when the function in the pred argument is called on them, or reaches the beginning." [hzip-loc pred] (until zip/prev hzip-loc #(clojure.core/or (nil? %) (pred %)))) (defn left-pred "Like clojure.zip/left, but moves until it reaches a node that returns true when the function in the pred argument is called on them, or reaches the left boundary of the current group of siblings." [hzip-loc pred] (until zip/left hzip-loc #(clojure.core/or (nil? %) (pred %)))) (defn right-pred "Like clojure.zip/right, but moves until it reaches a node that returns true when the function in the pred argument is called on them, or reaches the right boundary of the current group of siblings." [hzip-loc pred] (until zip/right hzip-loc #(clojure.core/or (nil? %) (pred %)))) (defn up-pred "Like clojure.zip/up, but moves until it reaches a node that returns true when the function in the pred argument is called on them, or reaches the beginning." [hzip-loc pred] (until zip/up hzip-loc #(clojure.core/or (nil? %) (pred %)))) (defn next-of-node-type "Like clojure.zip/next, but only counts moves to nodes that have the given type." [hzip-loc node-type] (next-pred hzip-loc #(= node-type (:type (zip/node %))))) (defn prev-of-node-type "Like clojure.zip/prev, but only counts moves to nodes that have the given type." [hzip-loc node-type] (prev-pred hzip-loc #(= node-type (:type (zip/node %))))) (defn left-of-node-type "Like clojure.zip/left, but only counts moves to nodes that have the given type." [hzip-loc node-type] (left-pred hzip-loc #(= node-type (:type (zip/node %))))) (defn right-of-node-type "Like clojure.zip/right, but only counts moves to nodes that have the given type." [hzip-loc node-type] (right-pred hzip-loc #(= node-type (:type (zip/node %))))) (defn after-subtree "Given a zipper loc, returns the zipper loc that is the first one after the arg's subtree, if there is a subtree. If there is no loc after this loc's subtree, returns the end node." [zip-loc] (if (zip/end? zip-loc) zip-loc (clojure.core/or (zip/right zip-loc) (loop [curr-loc zip-loc] (if (zip/up curr-loc) (clojure.core/or (zip/right (zip/up curr-loc)) (recur (zip/up curr-loc))) [(zip/node curr-loc) :end]))))) ;; ;; Select ;; (defn select-next-loc "Given a selector function and a loc inside a hickory zip data structure, returns the next zipper loc that satisfies the selection function. This can be the loc that is passed in, so be sure to move to the next loc if you want to use this function to exhaustively search through a tree manually. Note that if there is no next node that satisfies the selection function, nil is returned. The third argument, if present, must be a function of one argument that is called on a zipper loc to return the next loc to consider in the search. By default, this argument is zip/next. The fourth argument, if present, must be a function of one argument that is called on a zipper loc to determine if the end of the search has been reached (true return value). When the fourth argument returns true on a loc, that loc is not considered in the search and the search finishes with a nil return. By default, the fourth argument is zip/end?." ([selector-fn hzip-loc] (select-next-loc selector-fn hzip-loc zip/next)) ([selector-fn hzip-loc next-fn] (select-next-loc selector-fn hzip-loc next-fn zip/end?)) ([selector-fn hzip-loc next-fn end?-fn] (loop [loc hzip-loc] (if (end?-fn loc) nil (if (selector-fn loc) loc (recur (next-fn loc))))))) (defn select-locs "Given a selector function and a hickory data structure, returns a vector containing all of the zipper locs selected by the selector function." [selector-fn hickory-tree] (loop [loc (select-next-loc selector-fn (hzip/hickory-zip hickory-tree)) selected-nodes (transient [])] (if (nil? loc) (persistent! selected-nodes) (recur (select-next-loc selector-fn (zip/next loc)) (conj! selected-nodes loc))))) (defn select "Given a selector function and a hickory data structure, returns a vector containing all of the hickory nodes selected by the selector function." [selector-fn hickory-tree] (mapv zip/node (select-locs selector-fn hickory-tree))) ;; ;; Selectors ;; ;; Mostly based off the spec at http://www.w3.org/TR/selectors/#selectors ;; Some selectors are simply not possible outside a browser (active, ;; visited, etc). ;; (defn node-type "Return a function that takes a zip-loc argument and returns the zip-loc passed in iff it has the given node type. The type argument can be a String or Named (keyword, symbol). The node type comparison is done case-insensitively." [type] (fn [hzip-loc] (let [node (zip/node hzip-loc) node-type (-> node :type)] (if (clojure.core/and node-type (= (string/lower-case (name node-type)) (string/lower-case (name type)))) hzip-loc)))) (defn tag "Return a function that takes a zip-loc argument and returns the zip-loc passed in iff it has the given tag. The tag argument can be a String or Named (keyword, symbol). The tag name comparison is done case-insensitively." [tag] (fn [hzip-loc] (let [node (zip/node hzip-loc) node-tag (-> node :tag)] (if (clojure.core/and node-tag (= (string/lower-case (name node-tag)) (string/lower-case (name tag)))) hzip-loc)))) (defn attr "Returns a function that takes a zip-loc argument and returns the zip-loc passed in iff it has the given attribute, and that attribute optionally satisfies a predicate given as an additional argument. With a single argument, the attribute name (a string, keyword, or symbol), the function returned will return the zip-loc if that attribute is present (and has any value) on the zip-loc's node. The attribute name will be compared case-insensitively, but the attribute value (if present), will be passed as-is to the predicate. If the predicate argument is given, it will only return the zip-loc if that predicate is satisfied when given the attribute's value as its only argument. Note that the predicate only gets called when the attribute is present, so it can assume its argument is not nil." ([attr-name] ;; Since we want this call to succeed in any case where this attr ;; is present, we pass in a function that always returns true. (attr attr-name (fn [_] true))) ([attr-name predicate] ;; Note that attribute names are normalized to lowercase by ;; jsoup, as an html5 parser should; see here: ;; http://www.whatwg.org/specs/web-apps/current-work/#attribute-name-state (fn [hzip-loc] (let [node (zip/node hzip-loc) attr-key (keyword (string/lower-case (name attr-name)))] ;; If the attribute does not exist, we'll definitely return null. ;; Otherwise, we'll ask the predicate if we should return hzip-loc. (if (clojure.core/and (contains? (:attrs node) attr-key) (predicate (get-in node [:attrs attr-key]))) hzip-loc))))) (defn id "Returns a function that takes a zip-loc argument and returns the zip-loc passed in iff it has the given id. The id argument can be a String or Named (keyword, symbol). The id name comparison is done case-insensitively." [id] (attr :id #(= (string/lower-case %) (string/lower-case (name id))))) (defn class "Returns a function that takes a zip-loc argument and returns the zip-loc passed in iff it has the given class. The class argument can be a String or Named (keyword, symbol). The class name comparison is done case-insensitively." [class-name] (letfn [(parse-classes [class-str] (into #{} (mapv string/lower-case (string/split class-str #"\s+"))))] (attr :class #(contains? (parse-classes %) (string/lower-case (name class-name)))))) (defn any "This selector takes no args, it simply is the selector function. It returns true on any element it is called on; corresponds to the CSS '*' selector." [hzip-loc] (if (= :element (-> (zip/node hzip-loc) :type)) hzip-loc)) (def element "Another name for the any selector, to express that it can be used to only select elements." any) (defn element-child "This selector takes no args, it simply is the selector function. It returns the zip-loc passed in iff that loc is an element, and it has a parent that is also an element." [hzip-loc] (let [possible-parent (zip/up hzip-loc)] (clojure.core/and (element hzip-loc) ;; Check that we are not at the top already first. possible-parent (element possible-parent)))) (defn root "This selector takes no args, it simply is the selector function. It returns the zip-loc of the root node (the HTML element)." [hzip-loc] (if (= :html (-> (zip/node hzip-loc) :tag)) hzip-loc)) (defn find-in-text "Returns a function that takes a zip-loc argument and returns the zip-loc passed in iff it has some text node in its contents that matches the regular expression. Note that this only applies to the direct text content of a node; nodes which have the given text in one of their child nodes will not be selected." [re] (fn [hzip-loc] (when (some #(re-find re %) (->> (zip/node hzip-loc) :content (filter string?))) hzip-loc))) (defn n-moves-until "This selector returns a selector function that selects its argument if that argument is some \"distance\" from a \"boundary.\" This is an abstract way of phrasing it, but it captures the full generality. The selector this function returns will apply the move argument to its own output, beginning with its zipper loc argument, until the term-pred argument called on its output returns true. At that point, the number of times the move function was called successfully is compared to kn+c; if there exists some value of k such that the two quantities are equal, then the selector will return the argument zipper loc successfully. For example, (n-moves-until 2 1 clojure.zip/left nil?) will return a selector that calls zip/left on its own output, beginning with the argument zipper loc, until its return value is nil (nil? returns true). Suppose it called left 5 times before zip/left returned nil. Then the selector will return with success, since 2k+1 = 5 for k = 2. Most nth-child-* selectors in this package use n-moves-until in their implementation." [n c move term-pred] (fn [hzip-loc] (let [distance (count-until move hzip-loc term-pred)] (if (== 0 n) ;; No stride, so distance must = c to select. (if (== distance c) hzip-loc) ;; There's a stride, so need to subtract c and see if the ;; remaining distance is a multiple of n. (if (== 0 (rem (- distance c) n)) hzip-loc))))) (defn nth-of-type "Returns a function that returns true if the node is the nth child of its parent (and it has a parent) of the given tag type. First element is 1, last is n." ([c typ] (cond (= :odd c) (nth-of-type 2 1 typ) (= :even c) (nth-of-type 2 0 typ) :else (nth-of-type 0 c typ))) ([n c typ] (fn [hzip-loc] ;; We're only interested in elements whose parents are also elements, ;; so check this up front and maybe save some work. (if (clojure.core/and (element-child hzip-loc) (= typ (:tag (zip/node hzip-loc)))) (let [sel (n-moves-until n c #(left-pred % (fn [x] (-> (zip/node x) :tag (= typ)))) nil?)] (sel hzip-loc)))))) (defn nth-last-of-type "Returns a function that returns true if the node is the nth last child of its parent (and it has a parent) of the given tag type. First element is 1, last is n." ([c typ] (cond (= :odd c) (nth-last-of-type 2 1 typ) (= :even c) (nth-last-of-type 2 0 typ) :else (nth-last-of-type 0 c typ))) ([n c typ] (fn [hzip-loc] ;; We're only interested in elements whose parents are also elements, ;; so check this up front and maybe save some work. (if (clojure.core/and (element-child hzip-loc) (= typ (:tag (zip/node hzip-loc)))) (let [sel (n-moves-until n c #(right-pred % (fn [x] (-> (zip/node x) :tag (= typ)))) nil?)] (sel hzip-loc)))))) (defn nth-child "Returns a function that returns true if the node is the nth child of its parent (and it has a parent). First element is 1, last is n." ([c] (cond (= :odd c) (nth-child 2 1) (= :even c) (nth-child 2 0) :else (nth-child 0 c))) ([n c] (fn [hzip-loc] ;; We're only interested in elements whose parents are also elements, ;; so check this up front and maybe save some work. (if (element-child hzip-loc) (let [sel (n-moves-until n c #(left-of-node-type % :element) nil?)] (sel hzip-loc)))))) (defn nth-last-child "Returns a function that returns true if the node has n siblings after it, and has a parent." ([c] (cond (= :odd c) (nth-last-child 2 1) (= :even c) (nth-last-child 2 0) :else (nth-last-child 0 c))) ([n c] (fn [hzip-loc] ;; We're only interested in elements whose parents are also elements, ;; so check this up front and maybe save some work. (if (element-child hzip-loc) (let [sel (n-moves-until n c #(right-of-node-type % :element) nil?)] (sel hzip-loc)))))) (defn first-child "This selector takes no args, it is simply the selector. Returns true if the node is the first child of its parent (and it has a parent)." [hzip-loc] (clojure.core/and (element-child hzip-loc) ((nth-child 1) hzip-loc))) (defn last-child "This selector takes no args, it is simply the selector. Returns true if the node is the last child of its parent (and it has a parent." [hzip-loc] (clojure.core/and (element-child hzip-loc) ((nth-last-child 1) hzip-loc))) ;; ;; Selector combinators ;; (defn and "Takes any number of selectors and returns a selector that is true if all of the argument selectors are true." [& selectors] (fn [zip-loc] (if (every? #(% zip-loc) selectors) zip-loc))) (defn or "Takes any number of selectors and returns a selector that is true if any of the argument selectors are true." [& selectors] (fn [zip-loc] (if (some #(% zip-loc) selectors) zip-loc))) (defn not "Takes a selector argument and returns a selector that is true if the underlying selector is false on its argument, and vice versa." [selector] (fn [hzip-loc] (if (clojure.core/not (selector hzip-loc)) hzip-loc))) (defn el-not "Takes a selector argument and returns a selector that is true if the underlying selector is false on its argument and vice versa, and additionally that argument is an element node. Compared to the 'not' selector, this corresponds more closely to the CSS equivalent, which will only ever select elements." [selector] (and (node-type :element) (not selector))) (defn compose-unary "Takes a unary selection function and any number of selectors and returns a selector which returns true when each selector and the unary function applied to each subsequenct selector returns true. Example: (compose-unary has-child (tag :div) (class :foo) (attr :disabled)) Produces the equivalent of: (and (tag :div) (has-child (and (class :foo) (has-child (and (attr :disabled))))))" [unary-selector-fn & selectors] (let [rev (reverse selectors)] (loop [selectors (rest rev) output (and (first rev))] (cond (empty? selectors) output (= (count selectors) 1) (and (first selectors) (unary-selector-fn output)) :else (recur (rest selectors) (and (first selectors) (unary-selector-fn output))))))) (defn ordered-adjacent "Takes a zipper movement function and any number of selectors as arguments and returns a selector that returns true when the zip-loc given as the argument is satisfied by the first selector, and the zip-loc arrived at by applying the move-fn argument is satisfied by the second selector, and so on for all the selectors given as arguments. If the move-fn moves to nil before the full selector list is satisfied, the entire selector fails, but note that success is checked before a move to nil is checked, so satisfying the last selector with the last node you can move to succeeds." [move-fn & selectors] ;; We'll work backwards through the selector list with an index. First we'll ;; build the selector list into an array for quicker access. We'll do it ;; immediately and then closure-capture the result, so it does not get ;; redone every time the selector is called. (let [selectors (into-array IFn selectors)] (fn [hzip-loc] (loop [curr-loc hzip-loc idx 0] (cond (>= idx (count selectors)) hzip-loc ;; Got to end satisfying selectors, return the loc. (nil? curr-loc) nil ;; Ran off a boundary before satisfying selectors, return nil. :else (if-let [next-loc ((nth selectors idx) curr-loc)] (recur (move-fn next-loc) (inc idx)))))))) (defn child "Takes any number of selectors as arguments and returns a selector that returns true when the zip-loc given as the argument is at the end of a chain of direct child relationships specified by the selectors given as arguments. Example: (child (tag :div) (class :foo) (attr :disabled)) will select the input in

but not in
" [& selectors] (apply ordered-adjacent zip/up (reverse selectors))) (defn has-child "Takes a selector as argument and returns a selector that returns true when some direct child node of the zip-loc given as the argument satisfies the selector. Example: (has-child (tag :div)) will select only the inner span in
" [selector] (fn [hzip-loc] (let [subtree-start-loc (-> hzip-loc zip/down) has-children? (not= nil subtree-start-loc)] ;; has-children? is needed to guard against zip/* receiving a nil arg in ;; a selector. (if has-children? (if (select-next-loc selector subtree-start-loc zip/right #(nil? %)) hzip-loc))))) (defn parent "Takes any number of selectors as arguments and returns a selector that returns true when the zip-loc given as the argument is at the start of a chain of direct child relationships specified by the selectors given as arguments. Example: (parent (tag :div) (class :foo) (attr :disabled)) will select the div in
but not in
" [& selectors] (apply compose-unary has-child selectors)) (defn follow-adjacent "Takes any number of selectors as arguments and returns a selector that returns true when the zip-loc given as the argument is at the end of a chain of direct element sibling relationships specified by the selectors given as arguments. Example: (follow-adjacent (tag :div) (class :foo)) will select the span in
...
... but not in
...
......" [& selectors] (apply ordered-adjacent #(left-of-node-type % :element) (reverse selectors))) (defn precede-adjacent "Takes any number of selectors as arguments and returns a selector that returns true when the zip-loc given as the argument is at the beginning of a chain of direct element sibling relationships specified by the selectors given as arguments. Example: (precede-adjacent (tag :div) (class :foo)) will select the div in
...
... but not in
...
......" [& selectors] (apply ordered-adjacent #(right-of-node-type % :element) selectors)) (defn ordered "Takes a zipper movement function and any number of selectors as arguments and returns a selector that returns true when the zip-loc given as the argument is satisfied by the first selector, and some zip-loc arrived at by applying the move-fn argument *one or more times* is satisfied by the second selector, and so on for all the selectors given as arguments. If the move-fn moves to nil before a the full selector list is satisfied, the entire selector fails, but note that success is checked before a move to nil is checked, so satisfying the last selector with the last node you can move to succeeds." [move-fn & selectors] ;; This function is a lot like ordered-adjacent, above, but: ;; 1) failing to fulfill a selector does not stop us moving along the tree ;; 2) therefore, we need to make sure the first selector matches the loc under ;; consideration, and not merely one that is farther along the movement ;; direction. (let [selectors (into-array IFn selectors)] (fn [hzip-loc] ;; First need to check that the first selector matches the current loc, ;; or else we can return nil immediately. (let [fst-selector (nth selectors 0)] (if (fst-selector hzip-loc) ;; First selector matches this node, so now check along the ;; movement direction for the rest of the selectors. (loop [curr-loc (move-fn hzip-loc) idx 1] (cond (>= idx (count selectors)) hzip-loc ;; Satisfied all selectors, so return the orig. loc. (nil? curr-loc) nil ;; Ran out of movements before selectors, return nil. :else (if ((nth selectors idx) curr-loc) (recur (move-fn curr-loc) (inc idx)) ;; Failed, so move but retry the same selector (recur (move-fn curr-loc) idx))))))))) (defn descendant "Takes any number of selectors as arguments and returns a selector that returns true when the zip-loc given as the argument is at the end of a chain of descendant relationships specified by the selectors given as arguments. To be clear, the node selected matches the final selector, but the previous selectors can match anywhere in the node's ancestry, provided they match in the order they are given as arguments, from top to bottom. Example: (descendant (tag :div) (class :foo) (attr :disabled)) will select the input in both
and
" [& selectors] (apply ordered zip/up (reverse selectors))) (defn has-descendant "Takes a selector as argument and returns a selector that returns true when some descendant node of the zip-loc given as the argument satisfies the selector. Be aware that because this selector must do a full sub-tree search on each node examined, it can have terrible performance. It's helpful if this is a late clause in an `and`, to prevent it from even attempting to match unless other criteria have been met first. Example: (has-descendant (tag :div)) will select the span and the outer div, but not the inner div, in
" [selector] (fn [hzip-loc] ;; Want to not count the current node, and stop after the last node ;; in the subtree of it has been checked, which is the next node ;; after the rightmost child. (let [subtree-start-loc (-> hzip-loc zip/down) has-children? (not= nil subtree-start-loc)] ;; has-children? is needed to guard against zip/* receiving a nil arg in ;; a selector. (if has-children? (let [subtree-end-loc (after-subtree hzip-loc)] (if (select-next-loc selector subtree-start-loc zip/next #(= % subtree-end-loc)) hzip-loc)))))) (defn ancestor "Takes any number of selectors as arguments and returns a selector that returns true when the zip-loc given as the argument is at the start of a chain of descendant relationships specified by the selectors given as arguments; intervening elements that do not satisfy a selector are simply ignored and do not prevent a match. Example: (ancestor (tag :div) (class :foo) (attr :disabled)) will select the div in both
and
" [& selectors] (apply compose-unary has-descendant selectors)) (defn follow "Takes any number of selectors as arguments and returns a selector that returns true when the zip-loc given as the argument is at the end of a chain of element sibling relationships specified by the selectors given as arguments; intervening elements that do not satisfy a selector are simply ignored and do not prevent a match. Example: (follow (tag :div) (class :foo)) will select the span in both
...
... and
...
......" [& selectors] (apply ordered #(left-of-node-type % :element) (reverse selectors))) (defn precede "Takes any number of selectors as arguments and returns a selector that returns true when the zip-loc given as the argument is at the beginning of a chain of element sibling relationships specified by the selectors given as arguments; intervening elements that do not satisfy a selector are simply ignored and do not prevent a match. Example: (precede (tag :div) (class :foo)) will select the div in both
...
... and
...
......" [& selectors] (apply ordered #(right-of-node-type % :element) selectors)) ================================================ FILE: src/cljc/hickory/utils.cljc ================================================ (ns hickory.utils "Miscellaneous utilities used internally." (:require [clojure.string :as string] #?(:cljs [goog.string :as gstring]))) ;; ;; Data ;; (def void-element "Elements that don't have a meaningful form." #{:area :base :br :col :command :embed :hr :img :input :keygen :link :meta :param :source :track :wbr}) (def unescapable-content "Elements whose content should never have html-escape codes." #{:script :style}) ;; ;; String utils ;; (defn clj-html-escape-without-quoin "Actually copy pasted from quoin: https://github.com/davidsantiago/quoin/blob/develop/src/quoin/text.clj" [^String s] ;; This method is "Java in Clojure" for serious speedups. #?(:clj (let [sb (StringBuilder.) slength (long (count s))] (loop [idx (long 0)] (if (>= idx slength) (.toString sb) (let [c (char (.charAt s idx))] (case c \& (.append sb "&") \< (.append sb "<") \> (.append sb ">") \" (.append sb """) (.append sb c)) (recur (inc idx)))))) ;; This shouldn't be called directly in cljs, but if it is, we use the same implementation as the html-escape function :cljs (gstring/htmlEscape s))) (defn html-escape [s] #?(:clj (clj-html-escape-without-quoin s) :cljs (gstring/htmlEscape s))) (defn starts-with [^String s ^String prefix] #?(:clj (.startsWith s prefix) :cljs (goog.string.startsWith s prefix))) (defn lower-case-keyword "Converts its string argument into a lowercase keyword." [s] (-> s string/lower-case keyword)) (defn render-doctype "Returns a string containing the HTML source for the doctype with given args. The second and third arguments can be nil or empty strings." [name publicid systemid] (str "")) ================================================ FILE: src/cljc/hickory/zip.cljc ================================================ (ns hickory.zip (:require [clojure.zip :as zip])) ;; ;; Hickory ;; (defn hickory-zip "Returns a zipper for html dom maps (as from as-hickory), given a root element." [root] (zip/zipper (complement string?) (comp seq :content) (fn [node children] (assoc node :content (and children (apply vector children)))) root)) ;; ;; Hiccup ;; ;; Just to make things easier, we go ahead and do the work here to ;; make hiccup zippers work on both normalized (all items have tag, ;; attrs map, and any children) and unnormalized hiccup forms. (defn- children "Takes a hiccup node (normalized or not) and returns its children nodes." [node] (if (vector? node) ;; It's a hiccup node vector. (if (map? (second node)) ;; There is an attr map in second slot. (seq (subvec node 2)) ;; So skip tag and attr vec. (seq (subvec node 1))) ;; Otherwise, just skip tag. ;; Otherwise, must have a been a node list node)) ;; Note, it's not made clear at all in the docs for clojure.zip, but as far as ;; I can tell, you are given a node potentially with existing children and ;; the sequence of children that should totally replace the existing children. (defn- make "Takes a hiccup node (normalized or not) and a sequence of children nodes, and returns a new node that has the the children argument as its children." [node children] ;; The node might be either a vector (hiccup form) or a seq (which is like a ;; node-list). (if (vector? node) (if (map? (second node)) ;; Again, check for normalized vec. (into (subvec node 0 2) children) ;; Attach children after tag&attrs. (apply vector (first node) children)) ;; Otherwise, attach after tag. children)) ;; We were given a list for node, so just return the new list. (defn hiccup-zip "Returns a zipper for Hiccup forms, given a root form." [root] (zip/zipper sequential? children make root)) ================================================ FILE: src/cljs/hickory/core.cljs ================================================ (ns hickory.core (:require [hickory.utils :as utils] [clojure.zip :as zip] [goog.string :as gstring] [goog.dom] [goog.dom.NodeType])) ;; ;; Protocols ;; (defprotocol HiccupRepresentable "Objects that can be represented as Hiccup nodes implement this protocol in order to make the conversion." (as-hiccup [this] "Converts the node given into a hiccup-format data structure. The node must have an implementation of the HiccupRepresentable protocol; nodes created by parse or parse-fragment already do.")) (defprotocol HickoryRepresentable "Objects that can be represented as HTML DOM node maps, similar to clojure.xml, implement this protocol to make the conversion. Each DOM node will be a map or string (for Text/CDATASections). Nodes that are maps have the appropriate subset of the keys :type - [:comment, :document, :document-type, :element] :tag - node's tag, check :type to see if applicable :attrs - node's attributes as a map, check :type to see if applicable :content - node's child nodes, in a vector, check :type to see if applicable" (as-hickory [this] "Converts the node given into a hickory-format data structure. The node must have an implementation of the HickoryRepresentable protocol; nodes created by parse or parse-fragment already do.")) (defn node-type [type] (case type "ELEMENT" 1 "ATTRIBUTE" 2 "TEXT" 3 "CDATA_SECTION" 4 "ENTITY_REFERENCE" 5 "ENTITY" 6 "PROCESSING_INSTRUCTION" 7 "COMMENT" 8 "DOCUMENT" 9 "DOCUMENT_TYPE" 10 "DOCUMENT_FRAGMENT" 11 "NOTATION" 12)) (def Attribute (node-type "ATTRIBUTE")) (def Comment (node-type "COMMENT")) (def Document (node-type "DOCUMENT")) (def DocumentType (node-type "DOCUMENT_TYPE")) (def Element (node-type "ELEMENT")) (def Text (node-type "TEXT")) (defn- as-seq [nodelist] (if (seq? nodelist) nodelist (array-seq nodelist))) (defn format-doctype [dt] (let [name (.-name dt) publicId (.-publicId dt) systemId (.-systemId dt)] (if (seq publicId) (gstring/format "" name publicId systemId) (str "")))) (extend-protocol HiccupRepresentable object (as-hiccup [this] (condp = (.-nodeType this) Attribute [(utils/lower-case-keyword (.-name this)) (.-value this)] Comment (str "") Document (map as-hiccup (as-seq (.-childNodes this))) DocumentType (format-doctype this) ;; There is an issue with the hiccup format, which is that it ;; can't quite cover all the pieces of HTML, so anything it ;; doesn't cover is thrown into a string containing the raw ;; HTML. This presents a problem because it is then never the case ;; that a string in a hiccup form should be html-escaped (except ;; in an attribute value) when rendering; it should already have ;; any escaping. Since the HTML parser quite properly un-escapes ;; HTML where it should, we have to go back and un-un-escape it ;; wherever text would have been un-escaped. We do this by ;; html-escaping the parsed contents of text nodes, and not ;; html-escaping comments, data-nodes, and the contents of ;; unescapable nodes. Element (let [tag (utils/lower-case-keyword (.-tagName this))] (into [] (concat [tag (into {} (map as-hiccup (as-seq (.-attributes this))))] (if (utils/unescapable-content tag) (map goog.dom.getRawTextContent (as-seq (.-childNodes this))) (map as-hiccup (as-seq (.-childNodes this))))))) Text (utils/html-escape (goog.dom.getRawTextContent this))))) (extend-protocol HickoryRepresentable object (as-hickory [this] (condp = (.-nodeType this) Attribute [(utils/lower-case-keyword (.-name this)) (.-value this)] Comment {:type :comment :content [(.-data this)]} Document {:type :document :content (not-empty (into [] (map as-hickory (as-seq (.-childNodes this)))))} DocumentType {:type :document-type :attrs {:name (.-name this) :publicid (.-publicId this) :systemid (.-systemId this)}} Element {:type :element :attrs (not-empty (into {} (map as-hickory (as-seq (.-attributes this))))) :tag (utils/lower-case-keyword (.-tagName this)) :content (not-empty (into [] (map as-hickory (as-seq (.-childNodes this)))))} Text (goog.dom.getRawTextContent this)))) (defn extract-doctype [s] ;;Starting HTML5 doctype definition can be uppercase (when-let [doctype (second (or (re-find #"]*)>" s) (re-find #"]*)>" s)))] (re-find #"([^\s]*)(\s+PUBLIC\s+[\"]?([^\"]*)[\"]?\s+[\"]?([^\"]*)[\"]?)?" doctype))) (defn remove-el [el] (.removeChild (.-parentNode el) el)) (defn parse-dom-with-domparser [s] (when (exists? js/DOMParser) (.parseFromString (js/DOMParser.) s "text/html"))) (defn parse-dom-with-write "Parse an HTML document (or fragment) as a DOM using document.implementation.createHTMLDocument and document.write." [s] ;;See http://www.w3.org/TR/domcore/#dom-domimplementation-createhtmldocument for more details. (let [doc (.createHTMLDocument js/document.implementation "") ;;empty title for older implementation doctype-el (.-doctype doc)] (when-not (extract-doctype s);; Remove default doctype if parsed string does not define it. (remove-el doctype-el)) (when-let [title-el (-> doc .-head .-firstChild)];; Remove default title if parsed string does not define it. (when (empty? (.-text title-el)) (remove-el title-el))) (.write doc s) doc)) (defn parse "Parse an entire HTML document into a DOM structure that can be used as input to as-hiccup or as-hickory. ```klipse (-> (parse \"foo

Hello

\") as-hiccup) ``` ```klipse (-> (parse \"foo

Hello

\") as-hickory) ``` " [s] (or (parse-dom-with-domparser s) (parse-dom-with-write s))) (defn parse-fragment "Parse an HTML fragment (some group of tags that might be at home somewhere in the tag hierarchy under ) into a list of DOM elements that can each be passed as input to as-hiccup or as-hickory." [s] (as-seq (-> (parse s) .-body .-childNodes))) ================================================ FILE: test/cljc/hickory/test/convert.cljc ================================================ (ns hickory.test.convert (:require [clojure.test :refer [deftest is]] [hickory.convert :refer [hiccup-fragment-to-hickory hiccup-to-hickory hickory-to-hiccup]] [hickory.core :refer [as-hiccup as-hickory parse parse-fragment]])) (deftest hiccup-to-hickory-test (is (= (as-hickory (parse "Hi.")) (hiccup-to-hickory (as-hiccup (parse "Hi."))))) (is (= (as-hickory (parse "OuterInner.")) (hiccup-to-hickory (as-hiccup (parse "OuterInner."))))) (is (= (as-hickory (parse "Hi")) (hiccup-to-hickory (as-hiccup (parse "Hi"))))) (is (= (as-hickory (parse "")) (hiccup-to-hickory (as-hiccup (parse "")))))) (deftest hiccup-fragment-to-hickory-test (is (= (map as-hickory (parse-fragment "")) (hiccup-fragment-to-hickory (map as-hiccup (parse-fragment ""))))) (let [src "It's an a."] (is (= (map as-hickory (parse-fragment src)) (hiccup-fragment-to-hickory (map as-hiccup (parse-fragment src))))))) (deftest hickory-to-hiccup-test (is (= (as-hiccup (parse "Hi.")) (hickory-to-hiccup (as-hickory (parse "Hi."))))) (is (= (as-hiccup (parse "OuterInner.")) (hickory-to-hiccup (as-hickory (parse "OuterInner."))))) (is (= (as-hiccup (parse "Hi")) (hickory-to-hiccup (as-hickory (parse "Hi"))))) (is (= (as-hiccup (parse "")) (hickory-to-hiccup (as-hickory (parse ""))))) ;; Fragments (is (= (map as-hiccup (parse-fragment "")) (map hickory-to-hiccup (map as-hickory (parse-fragment ""))))) (let [src "It's an a."] (is (= (map as-hiccup (parse-fragment src)) (map hickory-to-hiccup (map as-hickory (parse-fragment src))))))) ================================================ FILE: test/cljc/hickory/test/core.cljc ================================================ (ns hickory.test.core (:require [hickory.core :refer [as-hickory as-hiccup parse parse-fragment]] [clojure.test :refer [deftest is]])) ;; This document tests: doctypes, white space text nodes, attributes, ;; and cdata nodes. (deftest basic-documents (is (= ["" [:html {} [:head {}] [:body {} [:a {:href "foo"} "foo"] " " [:a {:id "so", :href "bar"} "bar"] [:script {:src "blah.js"} "alert(\"hi\");"]]]] (as-hiccup (parse "foo bar")))) (is (= {:type :document, :content [{:type :document-type, :attrs {:name "html", :publicid "", :systemid "" #?@(:clj [:#doctype "html"])}} {:type :element, :attrs nil, :tag :html, :content [{:type :element, :attrs nil, :tag :head, :content nil} {:type :element, :attrs nil, :tag :body, :content [{:type :element, :attrs {:href "foo"}, :tag :a, :content ["foo"]} " " {:type :element, :attrs {:id "so", :href "bar"}, :tag :a, :content ["bar"]} {:type :element, :attrs {:src "blah.js"}, :tag :script, :content ["alert(\"hi\");"]}]}]}]} (as-hickory (parse "foo bar"))))) ;; This document tests: doctypes, comments, white space text nodes, attributes, ;; and cdata nodes. (deftest basic-documents2 (is (= ["" [:html {} [:head {}] [:body {} "" [:a {:href "foo"} "foo"] " " [:a {:id "so", :href "bar"} "bar"] [:script {:src "blah.js"} "alert(\"hi\");"]]]] (as-hiccup (parse "foo bar")))) (is (= {:type :document, :content [{:type :document-type, :attrs {:name "html", :publicid "", :systemid "" #?@(:clj [:#doctype "html"])}} {:type :element, :attrs nil, :tag :html, :content [{:type :element, :attrs nil, :tag :head, :content nil} {:type :element, :attrs nil, :tag :body, :content [{:type :comment :content ["comment"]} {:type :element, :attrs {:href "foo"}, :tag :a, :content ["foo"]} " " {:type :element, :attrs {:id "so", :href "bar"}, :tag :a, :content ["bar"]} {:type :element, :attrs {:src "blah.js"}, :tag :script, :content ["alert(\"hi\");"]}]}]}]} (as-hickory (parse "foo bar"))))) ;; Want to test a document fragment that has multiple nodes with no parent, ;; as well as a text node between nodes. (deftest basic-document-fragment (is (= [[:a {:href "foo"} "foo"] " " [:a {:href "bar"} "bar"]] (map as-hiccup (parse-fragment "foo bar")))) (is (= [{:type :element, :attrs {:href "foo"}, :tag :a, :content ["foo"]} " " {:type :element, :attrs {:href "bar"}, :tag :a, :content ["bar"]}] (map as-hickory (parse-fragment "foo bar"))))) (deftest unencoded-text-nodes ;; Hiccup versions - Note that hiccup representation does not html-escape any ;; strings that aren't attribute values, so the hiccup representation will ;; have the string contents html-escaped. (is (= [[:html {} [:head {}] [:body {} [:p {} "ABC&\n\nDEF."]]]] (as-hiccup (parse "

ABC&\n\nDEF.

")))) ;;
 tag preserves whitespace.
  (is (= [[:html {} [:head {}] [:body {} [:pre {} "ABC&\n\nDEF."]]]]
         (as-hiccup (parse "
ABC&\n\nDEF.
")))) ;; Hickory versions - Note that the representation is different, and Hickory ;; format does not keep HTML escaped in its representation, as it can ;; figure out what to escape at render time. (is (= "ABC&\n\nDEF." (get-in (as-hickory (parse "

ABC&\n\nDEF.

")) [:content 0 :content 1 :content 0 :content 0]))) ;;
 tag preserves whitespace.
  (is (= "ABC&\n\nDEF."
         (get-in (as-hickory (parse "
ABC&\n\nDEF.
")) [:content 0 :content 1 :content 0 :content 0])))) ;; Issue #50: Tests that the parser does not throw a StackOverflowError when ;; parsing a document with deeply nested HTML tags. ;; I don't have time for this #_(deftest deeply-nested-tags (let [jsoup (parse (apply str (repeat 2048 "abc"))) r1 (get-in (vec (as-hiccup jsoup)) (concat [0 3 2] (repeat 2047 3))) r2 (get-in (as-hickory jsoup) (apply concat [:content 0 :content 1 :content 0] (repeat 2047 [:content 1])))] (println "R1" r1) (println "R2" r2) (is (= [:font {} "abc"] r1)) (is (= {:type :element :attrs nil :tag :font :content ["abc"]} r2)))) #?(:cljs (deftest node-type-test (is (= 3 (hickory.core/node-type "TEXT"))))) ================================================ FILE: test/cljc/hickory/test/hiccup_utils.cljc ================================================ (ns hickory.test.hiccup-utils (:require [hickory.hiccup-utils :refer [class-names id normalize-form tag-name tag-well-formed?]] [clojure.test :refer [deftest is]])) #?(:clj (deftest first-idx-test (let [first-idx #'hickory.hiccup-utils/first-idx] (is (= -1 (first-idx -1 -1))) (is (= 2 (first-idx -1 2))) (is (= 5 (first-idx 5 -1))) (is (= 3 (first-idx 5 3))) (is (= 3 (first-idx 3 5)))))) (deftest tag-well-formed?-test (is (= true (tag-well-formed? :a))) (is (= true (tag-well-formed? :a#id))) (is (= true (tag-well-formed? :a#id.class))) (is (= true (tag-well-formed? :a.class.class2))) (is (= false (tag-well-formed? ""))) (is (= false (tag-well-formed? ".class"))) (is (= false (tag-well-formed? "a#"))) (is (= false (tag-well-formed? "a#foo."))) (is (= false (tag-well-formed? "a."))) (is (= false (tag-well-formed? "a.foo."))) (is (= false (tag-well-formed? "#id.class"))) (is (= false (tag-well-formed? :a.class#id))) (is (= false (tag-well-formed? :a#id#id2)))) (deftest tag-name-test (is (= "a" (tag-name "a"))) (is (= "a" (tag-name 'a))) (is (= "a" (tag-name :a))) (is (= "b" (tag-name :b.class))) (is (= "b" (tag-name :b#id))) (is (= "b" (tag-name :b.class#id))) (is (= "b" (tag-name :b#id.class)))) (deftest class-names-test (is (= [] (class-names :a))) (is (= [] (class-names :a#foo))) (is (= ["foo"] (class-names "a.foo"))) (is (= ["bar"] (class-names :a#foo.bar))) (is (= ["foo" "bar"] (class-names :a.foo.bar)))) (deftest id-test (is (= nil (id :a))) (is (= nil (id 'a))) (is (= "foo" (id :a#foo))) (is (= "foo" (id :a#foo.bar)))) #?(:clj (deftest expand-content-seqs-test (let [expand-content-seqs #'hickory.hiccup-utils/expand-content-seqs] (is (= [1 2 3] (expand-content-seqs [1 2 3]))) (is (= [1 2 [3]] (expand-content-seqs [1 '(2 [3])]))) ;; Example from docstring. (is (= [1 2 3 2 4 6 [5]] (expand-content-seqs [1 '(2 3) (for [x [1 2 3]] (* x 2)) [5]])))))) #?(:clj (deftest normalize-element-test (let [normalize-element #'hickory.hiccup-utils/normalize-element] (is (= [:a {:id nil :class nil} "Hi"] (normalize-element [:a "Hi"]))) (is (= [:a {:id "foo" :class nil} "Hi"] (normalize-element [:A#foo "Hi"]))) (is (= [:a {:id nil :class "foo"} "Hi"] (normalize-element [:a.foo "Hi"]))) (is (= [:a {:id "foo" :class "bar"} "Hi" "There"] (normalize-element [:a#foo.bar "Hi" "There"]))) (is (= [:a {:id "foo" :class "bar"} "Hi"] (normalize-element [:a.bar {:id "foo"} "Hi"]))) (is (= [:a {:id "foo" :class "bar"}] (normalize-element [:A#bip {:id "foo" :class "bar"}]))) (is (= [:a {:id "foo" :class "bar"}] (normalize-element [:a#bip.baz {:id "foo" :class "bar"}]))) (is (= [:a {:id nil :class "foo bar"}] (normalize-element [:a.foo.bar])))))) (deftest normalize-form-test (is (= [:a {:id nil :class nil}] (normalize-form [:A]))) (is (= [:a {:id nil :class nil :href "localhost"}] (normalize-form [:a {:href "localhost"}]))) (is (= [:a {:id nil :class nil} [:b {:id nil :class nil} "foo"] [:i {:id nil :class nil} "bar"]] (normalize-form [:a [:b "foo"] [:i "bar"]]))) (is (= [:a {:id nil :class nil} [:b {:id nil :class nil} "foo"] [:i {:id nil :class nil} "bar"]] (normalize-form [:a '([:b "foo"] [:i "bar"])]))) (is (= [:a {:id nil :class nil} [:b {:id nil :class nil} "foo" [:i {:id nil :class nil} "bar"]]] (normalize-form [:a [:b "foo" [:i "bar"]]])))) ================================================ FILE: test/cljc/hickory/test/render.cljc ================================================ (ns hickory.test.render (:require [clojure.test :refer [deftest is]] [hickory.core :refer [as-hiccup as-hickory parse parse-fragment]] [hickory.render :refer [hiccup-to-html hickory-to-html]])) ;; ;; Hickory to HTML ;; (deftest hickory-to-html-test (is (= "

hi

" (hickory-to-html (as-hickory (parse "

hi"))))) ;; Make sure void elements don't have closing tags. (is (= "Hi
There" (hickory-to-html (as-hickory (parse "Hi
There"))))) ;; Make sure text is properly escaped. (is (= "<html>" (hickory-to-html (as-hickory (first (parse-fragment "<html>")))))) ;; Make sure the contents of script/style tags do not get html escaped. (is (= "" (hickory-to-html (as-hickory (first (parse-fragment "")))))) ;; Make sure attribute contents are html-escaped. (is (= "" (hickory-to-html (as-hickory (first (parse-fragment ""))))))) (deftest hickory-doctypes-test (is (= "" (hickory-to-html {:type :document-type :attrs {:name "html", :publicid nil :systemid nil}}))) (is (= "" (hickory-to-html {:type :document-type :attrs {:name "html", :publicid "-//W3C//DTD HTML 4.01//EN", :systemid "http://www.w3.org/TR/html4/strict.dtd"}}))) (is (= "" (hickory-to-html (as-hickory (parse ""))))) ;; Apparently Chrome will parse this doctype as plain html5, so we can't ;; do a roundtrip test in cljs. #?(:clj (is (= "" (hickory-to-html (as-hickory (parse ""))))))) (deftest error-handling-test (let [data {:type :foo :tag :a :attrs {:foo "bar"}}] (is (thrown-with-msg? #?(:clj Exception :cljs :default) #"^Not a valid node: nil" (hickory-to-html nil))) (is (thrown-with-msg? #?(:clj Exception :cljs :default) #?(:clj #"^Not a valid node: \{:type :foo, :tag :a, :attrs \{:foo \"bar\"\}\}" :cljs #"^Not a valid node: \{:type :foo, :tag :a\, :attrs \{:foo \"bar\"\}}") (hickory-to-html data))) (is (= data (try (hickory-to-html data) (catch #?(:clj Exception :cljs js/Error) e (:dom (ex-data e)))))))) ;; ;; Hiccup to HTML ;; (deftest hiccup-to-html-test (is (= "

hi

" (hiccup-to-html (as-hiccup (parse "

hi"))))) ;; Make sure void elements don't have closing tags. (is (= "Hi
There" (hiccup-to-html (as-hiccup (parse "Hi
There"))))) ;; Make sure text is properly escaped. (is (= "<html>" (hiccup-to-html [(as-hiccup (first (parse-fragment "<html>")))]))) ;; Make sure the contents of script/style tags do not get html escaped. (is (= "" (hiccup-to-html [(as-hiccup (first (parse-fragment "")))]))) ;; Make sure attribute contents are html-escaped. (is (= "" (hiccup-to-html [(as-hiccup (first (parse-fragment "")))])))) (deftest hiccup-doctypes-test (is (= "" (hiccup-to-html (as-hiccup (parse ""))))) #?(:clj (is (= "" (hiccup-to-html (as-hiccup (parse ""))))))) ================================================ FILE: test/cljc/hickory/test/select.cljc ================================================ (ns hickory.test.select (:require [hickory.core :as hickory] [hickory.select :as select] [hickory.utils :as utils] [hickory.zip :as hzip] [clojure.zip :as zip] [clojure.test :refer [deftest is testing]])) (def html1 "

Heading

Paragraph

Link
Div
Span
") (def html2 "

Paragraph 1

Paragraph 2

Paragraph 3

Paragraph 4

Paragraph 5

Paragraph 6

Paragraph 7

Paragraph 8

") (deftest select-next-loc-test (testing "The select-next-loc function." (let [htree (hickory/as-hickory (hickory/parse html1)) find-comment-fn (fn [zip-loc] (= (:type (zip/node zip-loc)) :comment))] (let [selection (select/select-next-loc find-comment-fn (hzip/hickory-zip htree))] (is (and (= :comment (-> selection zip/node :type)) (re-find #"Comment 1" (-> (zip/node selection) :content first)))) (let [second-selection (select/select-next-loc find-comment-fn (zip/next selection))] (is (and (= :comment (-> second-selection zip/node :type)) (re-find #"Comment 2" (-> (zip/node second-selection) :content first)))) (is (nil? (select/select-next-loc find-comment-fn (zip/next second-selection))))))))) (deftest select-test (testing "The select function." (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (fn [zip-loc] (= (:type (zip/node zip-loc)) :document-type)) htree)] (is (and (= 1 (count selection)) (= :document-type (-> selection first :type))))) (let [selection (select/select (fn [zip-loc] (= (:type (zip/node zip-loc)) :comment)) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :comment (:type %)) selection)))))))) ;; ;; Selector tests ;; (deftest node-type-test (testing "node-type selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/node-type :document-type) htree)] (is (and (= 1 (count selection)) (= :document-type (-> selection first :type))))) (let [selection (select/select (select/node-type :comment) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :comment (:type %)) selection)))))))) (deftest tag-test (testing "tag selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/tag "h1") htree)] (is (and (= 1 (count selection)) (= :h1 (-> selection first :tag))))) ;; Case-insensitivity test (let [selection (select/select (select/tag "H1") htree)] (is (and (= 1 (count selection)) (= :h1 (-> selection first :tag))))) ;; Non-string argument test (let [selection (select/select (select/tag :h1) htree)] (is (and (= 1 (count selection)) (= :h1 (-> selection first :tag)))))))) (deftest attr-test (testing "attr selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/attr :disabled) htree)] (is (and (= 1 (count selection)) (= "attrspan" (-> selection first :attrs :id))))) (let [selection (select/select (select/attr "anotherattr") htree)] (is (and (= 1 (count selection)) (= "attrspan" (-> selection first :attrs :id))))) (let [selection (select/select (select/attr :thirdthing #(= "44" %)) htree)] (is (and (= 1 (count selection)) (= "attrspan" (-> selection first :attrs :id))))) ;; Case-insensitivity of names and non-equality predicate test (let [selection (select/select (select/attr "CAPITALIZED" #(utils/starts-with % "UPPER")) htree)] (is (and (= 1 (count selection)) (= "attrspan" (-> selection first :attrs :id))))) ;; Graceful failure to find anything (let [selection (select/select (select/attr "notpresent" #(utils/starts-with % "never")) htree)] (is (= 0 (count selection))))))) (deftest id-test (testing "id selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/id "deepestdiv") htree)] (is (and (= 1 (count selection)) (re-find #"deepestdiv" (-> selection first :attrs :id))))) (let [selection (select/select (select/id "anid") htree)] (is (and (= 1 (count selection)) (re-find #"anid" (-> selection first :attrs :id))))) ;; Case-insensitivity test (let [selection (select/select (select/id "ANID") htree)] (is (and (= 1 (count selection)) (re-find #"anid" (-> selection first :attrs :id))))) ;; Non-string argument test (let [selection (select/select (select/id :anid) htree)] (is (and (= 1 (count selection)) (re-find #"anid" (-> selection first :attrs :id)))))))) (deftest class-test (testing "class selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/class "aclass") htree)] (is (and (= 1 (count selection)) (re-find #"aclass" (-> selection first :attrs :class))))) (let [selection (select/select (select/class "cool") htree)] (is (and (= 3 (count selection)) (every? #(not (nil? %)) (map #(re-find #"cool" (-> % :attrs :class)) selection))))) ;; Case-insensitivity test (let [selection (select/select (select/class "Aclass") htree)] (is (and (= 1 (count selection)) (re-find #"aclass" (-> selection first :attrs :class))))) ;; Non-string argument test (let [selection (select/select (select/class :aclass) htree)] (is (and (= 1 (count selection)) (re-find #"aclass" (-> selection first :attrs :class))))) ;; class followed by line feed (let [selection (select/select (select/class :line-feed-ahead) htree)] (is (and (= 1 (count selection)) (re-find #"line-feed-ahead" (-> selection first :attrs :class)))))))) (deftest any-test (testing "any selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select select/any htree)] (is (= 10 (count selection))))))) (deftest element-child-test (testing "element-child selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select select/element-child htree)] (is (= 9 (count selection))))))) (deftest root-test (testing "root selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select select/root htree)] (is (= :html (-> selection first :tag))))))) (deftest find-in-text-test (testing "find-in-text selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/find-in-text #"Heading") htree)] (is (and (= 1 (count selection)) (= :h1 (-> selection first :tag))))) (let [selection (select/select (select/find-in-text #"Div") htree)] (is (and (= 1 (count selection)) (= :div (-> selection first :tag))))) (let [selection-locs (select/select-locs (select/child (select/tag :body) (select/find-in-text #"Paragraph")) htree) selection (mapv zip/node selection-locs)] (is (and (= 1 (count selection)) (= :p (-> selection first :tag)) (= :body (-> selection-locs first zip/up zip/node :tag)))))) (let [htree (hickory/as-hickory (hickory/parse html2))] (let [selection (select/select (select/find-in-text #"Paragraph") htree)] (is (and (= 8 (count selection)) (every? #(= :p %) (map :tag selection)))))))) (deftest n-moves-until-test (testing "n-moves-until selector" ;; This function is actually pretty well exercised by nth-child, etc. (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/and (select/tag :div) (select/n-moves-until 0 6 zip/up nil?)) htree)] (is (= "deepestdiv" (-> selection first :attrs :id))))))) (deftest nth-of-type-test (testing "nth-of-type selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/nth-of-type 1 :body) htree)] (is (and (= 1 (count selection)) (= :body (:tag (first selection))))))))) (deftest nth-last-of-type-test (testing "nth-last-of-type selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/nth-last-of-type 1 :span) htree)] (is (and (= 1 (count selection)) (= "anid" (-> selection first :attrs :id)))))))) (deftest nth-child-test (testing "nth-child selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/and (select/tag :div) (select/nth-child 0 1)) htree)] (is (and (= 1 (count selection)) (= "deepestdiv" (-> selection first :attrs :id))))) (let [selection (select/select (select/and (select/tag :div) (select/nth-child 1 1)) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :div (:tag %)) selection))))) (let [selection (select/select (select/and (select/tag :div) (select/nth-child :odd)) htree)] (is (and (= 1 (count selection)) (= "deepestdiv" (-> selection first :attrs :id))))) (let [selection (select/select (select/and (select/node-type :element) (select/nth-child :even)) htree)] (is (and (= 4 (count selection)) (= :element (-> selection first :type)))))) (let [htree (hickory/as-hickory (hickory/parse html2))] (let [selection (select/select (select/and (select/node-type :element) (select/nth-child :even)) htree)] (is (and (= 5 (count selection)) (every? true? (map #(contains? #{:body :p} (:tag %)) selection))))) (let [selection (select/select (select/nth-child 3 0) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :p (:tag %)) selection))))) (let [selection (select/select (select/child (select/tag :body) (select/nth-child 3 1)) htree)] (is (and (= 3 (count selection)) (every? true? (map #(= :p (:tag %)) selection)))))))) (deftest nth-last-child-test (testing "nth-last-child selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/and (select/tag :div) (select/nth-last-child 0 1)) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :div (:tag %)) selection))))) (let [selection (select/select (select/and (select/tag :div) (select/nth-last-child 1 1)) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :div (:tag %)) selection))))) (let [selection (select/select (select/and (select/tag :div) (select/nth-last-child :odd)) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :div (:tag %)) selection))))) (let [selection (select/select (select/and (select/node-type :element) (select/nth-last-child :even)) htree)] (is (and (= 4 (count selection)) (= :element (-> selection first :type)))))) (let [htree (hickory/as-hickory (hickory/parse html2))] (let [selection (select/select (select/and (select/node-type :element) (select/nth-last-child :even)) htree)] (is (and (= 5 (count selection)) (every? true? (map #(contains? #{:head :p} (:tag %)) selection))))) (let [selection (select/select (select/nth-last-child 3 0) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :p (:tag %)) selection))))) (let [selection (select/select (select/child (select/tag :body) (select/nth-last-child 3 1)) htree)] (is (and (= 3 (count selection)) (every? true? (map #(= :p (:tag %)) selection)))))))) (deftest first-child-test (testing "first-child selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/child (select/tag :div) select/first-child) htree)] (is (and (= 1 (count selection)) (= "attrspan" (-> selection first :attrs :id)))))))) (deftest last-child-test (testing "last-child selector" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/child (select/tag :div) select/last-child) htree)] (is (and (= 1 (count selection)) (= "anid" (-> selection first :attrs :id)))))))) ;; ;; Selector Combinators ;; (deftest and-test (testing "and selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/and (select/tag :div)) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :div (:tag %)) selection))))) (let [selection (select/select (select/and (select/tag :div) (select/class "bclass")) htree)] (is (and (= 1 (count selection)) (re-find #"bclass" (-> selection first :attrs :class))))) (let [selection (select/select (select/and (select/class "cool") (select/tag :span)) htree)] (is (and (= 1 (count selection)) (= "anid" (-> selection first :attrs :id))))) (let [selection (select/select (select/and (select/class "cool") (select/tag :span) (select/id :attrspan)) htree)] (is (= [] selection)))))) (deftest or-test (testing "or selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/or (select/tag :a) (select/class "notpresent") (select/id :nothere)) htree)] (is (and (= 1 (count selection)) (every? true? (map #(= :a (:tag %)) selection))))) (let [selection (select/select (select/or (select/tag :div)) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :div (:tag %)) selection))))) (let [selection (select/select (select/or (select/id "deepestdiv") (select/class "bclass")) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :div (:tag %)) selection)))))))) (deftest not-test (testing "not selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/and (select/node-type :element) (select/not (select/class :cool))) htree)] (is (and (= 7 (count selection)) (every? true? (map #(and (= :element (-> % :type)) (or (not (-> % :attrs :class)) (not (re-find #"cool" (-> % :attrs :class))))) selection))))) (let [selection (select/select (select/el-not (select/class :cool)) htree)] (is (and (= 7 (count selection)) (every? true? (map #(and (= :element (-> % :type)) (or (not (-> % :attrs :class)) (not (re-find #"cool" (-> % :attrs :class))))) selection)))))))) (deftest ordered-adjacent-test (testing "ordered-adjacent selector combinator" ;; This is pretty well tested by the tests for child and others. (let [htree (hickory/as-hickory (hickory/parse html1))] ;; Select body tag that is just after a head tag. (let [selection (select/select (select/ordered-adjacent #(select/left-of-node-type % :element) (select/tag :body) (select/tag :head)) htree)] (is (and (= 1 (count selection)) (= :body (-> selection first :tag)))))))) (deftest child-test (testing "child selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/child (select/el-not select/any)) htree)] (is (= [] selection))) (let [selection (select/select (select/child (select/tag :html) (select/tag :div) (select/tag :span)) htree)] (is (= [] selection))) (let [selection (select/select (select/child (select/tag :body) (select/tag :div) (select/tag :span)) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :span (:tag %)) selection))))) (let [selection (select/select (select/child (select/tag :div) select/any) htree)] (is (and (= 2 (count selection)) (every? true? (map #(or (= :span (-> % :tag)) (= :div (-> % :tag))) selection)))))) ;; Check examples from the doc string. (let [htree (-> "
" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/child (select/tag :div) (select/class :foo) (select/attr :disabled)) htree)] (is (= :input (-> selection first :tag))))) (let [htree (-> "
" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/child (select/tag :div) (select/class :foo) (select/attr :disabled)) htree)] (is (= [] selection)))))) (deftest has-child-test (testing "has-child selector combinator" (let [docs ["
" "
" "
"]] (doseq [doc docs] (let [htree (-> doc hickory/parse hickory/as-hickory)] (let [selection (select/select (select/has-child (select/id :innermost)) htree)] (is (and (= 1 (count selection)) (every? true? (map #(= :div (-> % :tag)) selection))))) ;; Check that a descendant selector can peer up past the ;; node having its descendants examined. (let [selection (select/select (select/has-child (select/descendant (select/id :outermost) (select/id :innermost))) htree)] (is (and (= 1 (count selection)) (every? true? (map #(= :div (-> % :tag)) selection))))) (let [selection (select/select (select/has-child (select/tag :a)) htree)] (is (= [] selection)))))))) (deftest parent-test (testing "parent selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/parent (select/el-not select/any)) htree)] (is (= [] selection))) (let [selection (select/select (select/parent (select/tag :html) (select/tag :div) (select/tag :span)) htree)] (is (= [] selection))) (let [selection (select/select (select/parent (select/tag :body) (select/tag :div) (select/tag :span)) htree)] (is (and (= 1 (count selection)) (every? true? (map #(= :body (:tag %)) selection))))) (let [selection (select/select (select/parent (select/tag :div) select/any) htree)] (is (and (= 1 (count selection)) (every? true? (map #(= :div (-> % :tag)) selection))))) ;; Find any element that is a parent of another element (let [selection (select/select (select/parent select/any select/any) htree)] (is (and (= 4 (count selection)) (every? true? (mapv #(or (= :html (-> % :tag)) (= :body (-> % :tag)) (= :div (-> % :tag)) (= :span (-> % :tag))) selection)))))) ;; Check examples from the doc string. (let [htree (-> "
" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/parent (select/tag :div) (select/class :foo) (select/attr :disabled)) htree)] (is (= :div (-> selection first :tag))))) (let [htree (-> "
" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/parent (select/tag :div) (select/class :foo) (select/attr :disabled)) htree)] (is (= [] selection)))))) (deftest follow-adjacent-test (testing "follow-adjacent selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/follow-adjacent (select/tag :head) (select/tag :body)) htree)] (is (and (= 1 (count selection)) (= :body (-> selection first :tag)))))) ;; Check the examples from the doc string. (let [htree (-> "
...
..." hickory/parse hickory/as-hickory)] (let [selection (select/select (select/follow-adjacent (select/tag :div) (select/class "foo")) htree)] (is (= :span (-> selection first :tag))))) (let [htree (-> "
...
......" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/follow-adjacent (select/tag :div) (select/class "foo")) htree)] (is (= [] selection)))))) (deftest precede-adjacent-test (testing "precede-adjacent selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/precede-adjacent (select/tag :head) (select/tag :body)) htree)] (is (and (= 1 (count selection)) (= :head (-> selection first :tag)))))) ;; Check the examples from the doc string. (let [htree (-> "
...
..." hickory/parse hickory/as-hickory)] (let [selection (select/select (select/precede-adjacent (select/tag :div) (select/class "foo")) htree)] (is (and (= 1 (count selection)) (= :div (-> selection first :tag)))))) (let [htree (-> "
...
......" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/precede-adjacent (select/tag :div) (select/class "foo")) htree)] (is (= [] selection)))))) (deftest ordered-test ;; Just a basic tire kick here, it gets exercised by descendant, follow, and ;; precede. (testing "ordered selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/ordered #(select/left-of-node-type % :element) (select/tag :body) (select/tag :head)) htree)] (is (and (= 1 (count selection)) (= :body (-> selection first :tag)))))))) (deftest descendant-test (testing "descendant selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/descendant (select/tag :h1)) htree)] (is (and (= 1 (count selection)) (= :h1 (-> selection first :tag))))) (let [selection (select/select (select/descendant (select/class "cool") (select/tag :div)) htree)] (is (= 1 (count selection)) (= "deepestdiv" (-> selection first :attrs :id)))) (let [selection (select/select (select/descendant (select/tag :div) select/any) htree)] (is (= 3 (count selection))))) ;; Check examples from doc string. (let [htree (-> "
" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/descendant (select/tag :div) (select/class :foo) (select/attr :disabled)) htree)] (is (and (= 1 (count selection)) (= :input (-> selection first :tag)))))) (let [htree (-> "
" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/descendant (select/tag :div) (select/class :foo) (select/attr :disabled)) htree)] (is (and (= 1 (count selection)) (= :input (-> selection first :tag)))))))) (deftest has-descendant-test (testing "has-descendant selector combinator" (let [docs ["
" "
" "
"]] (doseq [doc docs] (let [htree (-> doc hickory/parse hickory/as-hickory)] (let [selection (select/select (select/and (select/tag :div) (select/has-descendant (select/id :innermost))) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :div (-> % :tag)) selection))))) ;; Check that a descendant selector can peer up past the ;; node having its descendants examined. (let [selection (select/select (select/and (select/tag :div) (select/has-descendant (select/descendant (select/id :outermost) (select/tag :span)))) htree)] (is (and (= 2 (count selection)) (every? true? (map #(= :div (-> % :tag)) selection))))) (let [selection (select/select (select/has-descendant (select/tag :a)) htree)] (is (= [] selection)))))))) (deftest ancestor-test (testing "ancestor selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/ancestor (select/tag :h1)) htree)] (is (and (= 1 (count selection)) (= :h1 (-> selection first :tag))))) (let [selection (select/select (select/ancestor (select/class "cool") (select/tag :div)) htree)] (is (= 1 (count selection)) (= "deepestdiv" (-> selection first :attrs :id)))) (let [selection (select/select (select/ancestor (select/tag :div) select/any) htree)] (is (= 1 (count selection)))) (let [selection (select/select (select/ancestor (select/tag :span)) htree)] (is (= 2 (count selection)))) ;; Find any element that is a parent of another element (let [selection (select/select (select/parent select/any select/any) htree)] (is (and (= 4 (count selection)) (every? true? (mapv #(or (= :html (-> % :tag)) (= :body (-> % :tag)) (= :div (-> % :tag)) (= :span (-> % :tag))) selection)))))) ;; Check examples from doc string. (let [htree (-> "
" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/ancestor (select/tag :div) (select/class :foo) (select/attr :disabled)) htree)] (is (and (= 1 (count selection)) (= :div (-> selection first :tag)))))) (let [htree (-> "
" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/ancestor (select/tag :div) (select/class :foo) (select/attr :disabled)) htree)] (is (and (= 1 (count selection)) (= :div (-> selection first :tag)))))))) (deftest follow-test (testing "follow selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/follow (select/tag :head) (select/tag :body)) htree)] (is (and (= 1 (count selection)) (= :body (-> selection first :tag)))))) ;; Check the examples from the doc string. (let [htree (-> "
...
..." hickory/parse hickory/as-hickory)] (let [selection (select/select (select/follow (select/tag :div) (select/class "foo")) htree)] (is (= :span (-> selection first :tag))))) (let [htree (-> "
...
......" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/follow (select/tag :div) (select/class "foo")) htree)] (is (= :span (-> selection first :tag))))))) (deftest precede-test (testing "precede selector combinator" (let [htree (hickory/as-hickory (hickory/parse html1))] (let [selection (select/select (select/precede (select/tag :head) (select/tag :body)) htree)] (is (and (= 1 (count selection)) (= :head (-> selection first :tag)))))) ;; Check the examples from the doc string. (let [htree (-> "
...
..." hickory/parse hickory/as-hickory)] (let [selection (select/select (select/precede (select/tag :div) (select/class "foo")) htree)] (is (and (= 1 (count selection)) (= :div (-> selection first :tag)))))) (let [htree (-> "
...
......" hickory/parse hickory/as-hickory)] (let [selection (select/select (select/precede (select/tag :div) (select/class "foo")) htree)] (is (= :div (-> selection first :tag))))))) (deftest graceful-boundaries-test ;; Testing some problematic expressions to make sure they gracefully ;; return empty results. (let [hick (-> (hickory/parse-fragment "") first hickory/as-hickory)] (is (= [] (select/select (select/child (select/follow-adjacent (select/tag :a) (select/tag :img))) hick))) (is (= [] (select/select (select/child (select/follow-adjacent (select/tag :nonexistent) (select/tag :img))) hick))) (is (= [] (select/select (select/child (select/follow-adjacent (select/tag :a) (select/tag :nonexistent))) hick))) (is (= [{:type :element, :attrs {:href ""}, :tag :img, :content nil}] (select/select (select/child select/first-child) hick))) (is (= [{:type :element, :attrs {:href ""}, :tag :img, :content nil}] (select/select (select/child select/last-child) hick))))) ================================================ FILE: test/cljc/hickory/test/zip.cljc ================================================ (ns hickory.test.zip (:require [clojure.zip :as zip] [hickory.core :refer [as-hiccup as-hickory parse]] [hickory.zip :refer [hickory-zip hiccup-zip]] [clojure.test :refer [deftest is]])) (deftest hickory-zipper (is (= {:type :document, :content [{:type :element, :attrs nil, :tag :html, :content [{:type :element, :attrs nil, :tag :head, :content nil} {:type :element, :attrs nil, :tag :body, :content [{:type :element, :attrs nil, :tag :a, :content nil}]}]}]} (zip/node (hickory-zip (as-hickory (parse "")))))) (is (= {:type :element, :attrs nil, :tag :html, :content [{:type :element, :attrs nil, :tag :head, :content nil} {:type :element, :attrs nil, :tag :body, :content [{:type :element, :attrs nil, :tag :a, :content nil}]}]} (-> (hickory-zip (as-hickory (parse ""))) zip/next zip/node))) (is (= {:type :element, :attrs nil, :tag :head, :content nil} (-> (hickory-zip (as-hickory (parse ""))) zip/next zip/next zip/node))) (is (= {:type :element, :attrs nil, :tag :body, :content [{:type :element, :attrs nil, :tag :a, :content nil}]} (-> (hickory-zip (as-hickory (parse ""))) zip/next zip/next zip/next zip/node))) (is (= {:type :element, :attrs nil, :tag :html, :content [{:type :element, :attrs nil, :tag :head, :content nil} {:type :element, :attrs nil, :tag :body, :content [{:type :element, :attrs nil, :tag :a, :content nil}]}]} (-> (hickory-zip (as-hickory (parse ""))) zip/next zip/next zip/next zip/up zip/node)))) (deftest hiccup-zipper (is (= '([:html {} [:head {}] [:body {} [:a {}]]]) (zip/node (hiccup-zip (as-hiccup (parse "")))))) (is (= [:html {} [:head {}] [:body {} [:a {}]]] (-> (hiccup-zip (as-hiccup (parse ""))) zip/next zip/node))) (is (= [:head {}] (-> (hiccup-zip (as-hiccup (parse ""))) zip/next zip/next zip/node))) (is (= [:body {} [:a {}]] (-> (hiccup-zip (as-hiccup (parse ""))) zip/next zip/next zip/next zip/node))) (is (= [:html {} [:head {}] [:body {} [:a {}]]] (-> (hiccup-zip (as-hiccup (parse ""))) zip/next zip/next zip/next zip/up zip/node)))) ================================================ FILE: test/cljs/hickory/advanced.edn ================================================ {:optimizations :advanced} ================================================ FILE: test/cljs/hickory/doo_runner.cljs ================================================ (ns hickory.doo-runner (:require [doo.runner :refer-macros [doo-tests]] [hickory.test.convert] [hickory.test.core] [hickory.test.hiccup-utils] [hickory.test.render] [hickory.test.select] [hickory.test.zip])) (doo-tests 'hickory.test.core 'hickory.test.convert 'hickory.test.hiccup-utils 'hickory.test.render 'hickory.test.select 'hickory.test.zip)