Repository: clj-commons/hickory
Branch: master
Commit: 051c94c552b1
Files: 30
Total size: 172.3 KB
Directory structure:
gitextract_ux8kf796/
├── .circleci/
│ └── config.yml
├── .clj-kondo/
│ ├── babashka/
│ │ └── fs/
│ │ └── config.edn
│ └── rewrite-clj/
│ └── rewrite-clj/
│ └── config.edn
├── .github/
│ ├── CODEOWNERS
│ └── workflows/
│ └── ci.yml
├── .gitignore
├── API.md
├── LICENSE
├── ORIGINATOR
├── README.md
├── bb.edn
├── deps.edn
├── package.json
├── project.clj
├── src/
│ ├── clj/
│ │ └── hickory/
│ │ └── core.clj
│ ├── cljc/
│ │ └── hickory/
│ │ ├── convert.cljc
│ │ ├── hiccup_utils.cljc
│ │ ├── render.cljc
│ │ ├── select.cljc
│ │ ├── utils.cljc
│ │ └── zip.cljc
│ └── cljs/
│ └── hickory/
│ └── core.cljs
└── test/
├── cljc/
│ └── hickory/
│ └── test/
│ ├── convert.cljc
│ ├── core.cljc
│ ├── hiccup_utils.cljc
│ ├── render.cljc
│ ├── select.cljc
│ └── zip.cljc
└── cljs/
└── hickory/
├── advanced.edn
└── doo_runner.cljs
================================================
FILE CONTENTS
================================================
================================================
FILE: .circleci/config.yml
================================================
# Clojure CircleCI 2.0 configuration file
#
# Check https://circleci.com/docs/2.0/language-clojure/ for more details
#
version: 2.1
workflows:
build-deploy:
jobs:
- build:
filters:
tags:
only: /.*/
- deploy:
requires:
- build
filters:
tags:
only: /Release-.*/
context:
- CLOJARS_DEPLOY
jobs:
build:
docker:
# specify the version you desire here
- image: cimg/clojure:1.11.1-browsers
# Specify service dependencies here if necessary
# CircleCI maintains a library of pre-built images
# documented at https://circleci.com/docs/2.0/circleci-images/
# - image: circleci/postgres:9.4
working_directory: ~/repo
environment:
LEIN_ROOT: "true"
# Customize the JVM maximum heap limit
JVM_OPTS: -Xmx3200m
steps:
- checkout
# Download and cache dependencies
- restore_cache:
keys:
- v1-dependencies-{{ checksum "project.clj" }}
# fallback to using the latest cache if no exact match is found
- v1-dependencies-
- run:
name: Install babashka
command: |
curl -s https://raw.githubusercontent.com/borkdude/babashka/master/install -o install.sh
sudo bash install.sh
rm install.sh
- run:
name: install karma
command: |
sudo npm install -g karma-cli
sudo npm install
- run: lein deps
- save_cache:
paths:
- ~/.m2
key: v1-dependencies-{{ checksum "project.clj" }}
# run tests!
- run: bb test:clj
- run: bb test:cljs
deploy:
docker:
- image: cimg/clojure:1.11.1-browsers
working_directory: ~/repo
environment:
LEIN_ROOT: "true"
# Customize the JVM maximum heap limit
JVM_OPTS: -Xmx3200m
steps:
- checkout
# Download and cache dependencies
- restore_cache:
keys:
- v1-dependencies-{{ checksum "project.clj" }}
# fallback to using the latest cache if no exact match is found
- v1-dependencies-
# Download and cache dependencies
- restore_cache:
keys:
- v1-dependencies-{{ checksum "project.clj" }}
# fallback to using the latest cache if no exact match is found
- v1-dependencies-
- run:
name: Install babashka
command: |
curl -s https://raw.githubusercontent.com/borkdude/babashka/master/install -o install.sh
sudo bash install.sh
rm install.sh
- run:
name: Install deployment-script
command: |
curl -s https://raw.githubusercontent.com/clj-commons/infra/main/deployment/circle-maybe-deploy.bb -o circle-maybe-deploy.bb
chmod a+x circle-maybe-deploy.bb
- run: lein deps
- run:
name: Setup GPG signing key
command: |
GNUPGHOME="$HOME/.gnupg"
export GNUPGHOME
mkdir -p "$GNUPGHOME"
chmod 0700 "$GNUPGHOME"
echo "$GPG_KEY" \
| base64 --decode --ignore-garbage \
| gpg --batch --allow-secret-key-import --import
gpg --keyid-format LONG --list-secret-keys
- save_cache:
paths:
- ~/.m2
key: v1-dependencies-{{ checksum "project.clj" }}
- run:
name: Deploy
command: |
GPG_TTY=$(tty)
export GPG_TTY
echo $GPG_TTY
./circle-maybe-deploy.bb lein deploy clojars
================================================
FILE: .clj-kondo/babashka/fs/config.edn
================================================
{:lint-as {babashka.fs/with-temp-dir clojure.core/let}}
================================================
FILE: .clj-kondo/rewrite-clj/rewrite-clj/config.edn
================================================
{:lint-as
{rewrite-clj.zip/subedit-> clojure.core/->
rewrite-clj.zip/subedit->> clojure.core/->>
rewrite-clj.zip/edit-> clojure.core/->
rewrite-clj.zip/edit->> clojure.core/->>}}
================================================
FILE: .github/CODEOWNERS
================================================
* @port19x
================================================
FILE: .github/workflows/ci.yml
================================================
name: ci
on: [push, pull_request]
jobs:
clojure:
strategy:
matrix:
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v3
# It is important to install java before installing clojure tools which needs java
# exclusions: babashka, clj-kondo and cljstyle
- name: Prepare java
uses: actions/setup-java@v3
with:
distribution: 'zulu'
java-version: '8'
- name: Install clojure tools
uses: DeLaGuardo/setup-clojure@10.0
with:
bb: latest
lein: latest
# Optional step:
- name: Cache clojure dependencies
uses: actions/cache@v3
with:
path: |
~/.m2/repository
~/.gitlibs
~/.deps.clj
# List all files containing dependencies:
key: cljdeps-${{ hashFiles('deps.edn') }}
# key: cljdeps-${{ hashFiles('deps.edn', 'bb.edn') }}
# key: cljdeps-${{ hashFiles('project.clj') }}
# key: cljdeps-${{ hashFiles('build.boot') }}
restore-keys: cljdeps-
- name: Execute clj tests
run: bb test:clj
- name: Execute cljs tests
run: |
bb test:cljs-npm-install
bb test:cljs -c test/cljs/hickory/advanced.edn
================================================
FILE: .gitignore
================================================
/target
/lib
/classes
/checkouts
/codox-out
pom.xml
*.jar
*.class
.lein-deps-sum
.lein-failures
.lein-plugins
.lein-repl-history
out/
.cpcache
.cache
.clj-kondo/.cache
.lsp/.cache
.portal/vs-code.edn
cljs-test-runner-out
node_modules
package-lock.json
================================================
FILE: API.md
================================================
# Table of contents
- [`hickory.core`](#hickory.core)
- [`Attribute`](#hickory.core/attribute)
- [`Comment`](#hickory.core/comment)
- [`Document`](#hickory.core/document)
- [`DocumentType`](#hickory.core/documenttype)
- [`Element`](#hickory.core/element)
- [`HiccupRepresentable`](#hickory.core/hiccuprepresentable) - Objects that can be represented as Hiccup nodes implement this protocol in order to make the conversion.
- [`HickoryRepresentable`](#hickory.core/hickoryrepresentable) - Objects that can be represented as HTML DOM node maps, similar to clojure.xml, implement this protocol to make the conversion.
- [`Text`](#hickory.core/text)
- [`as-hiccup`](#hickory.core/as-hiccup) - Converts the node given into a hiccup-format data structure.
- [`as-hickory`](#hickory.core/as-hickory) - Converts the node given into a hickory-format data structure.
- [`extract-doctype`](#hickory.core/extract-doctype)
- [`format-doctype`](#hickory.core/format-doctype)
- [`node-type`](#hickory.core/node-type)
- [`parse`](#hickory.core/parse) - Parse an entire HTML document into a DOM structure that can be used as input to as-hiccup or as-hickory.
- [`parse-dom-with-domparser`](#hickory.core/parse-dom-with-domparser)
- [`parse-dom-with-write`](#hickory.core/parse-dom-with-write) - Parse an HTML document (or fragment) as a DOM using document.implementation.createHTMLDocument and document.write.
- [`parse-fragment`](#hickory.core/parse-fragment) - Parse an HTML fragment (some group of tags that might be at home somewhere in the tag hierarchy under <body>) into a list of DOM elements that can each be passed as input to as-hiccup or as-hickory.
- [`remove-el`](#hickory.core/remove-el)
-----
# <a name="hickory.core">hickory.core</a>
## <a name="hickory.core/attribute">`Attribute`</a>
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L52-L52">Source</a></sub></p>
## <a name="hickory.core/comment">`Comment`</a>
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L53-L53">Source</a></sub></p>
## <a name="hickory.core/document">`Document`</a>
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L54-L54">Source</a></sub></p>
## <a name="hickory.core/documenttype">`DocumentType`</a>
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L55-L55">Source</a></sub></p>
## <a name="hickory.core/element">`Element`</a>
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L56-L56">Source</a></sub></p>
## <a name="hickory.core/hiccuprepresentable">`HiccupRepresentable`</a>
Objects that can be represented as Hiccup nodes implement this protocol in
order to make the conversion.
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L12-L18">Source</a></sub></p>
## <a name="hickory.core/hickoryrepresentable">`HickoryRepresentable`</a>
Objects that can be represented as HTML DOM node maps, similar to
clojure.xml, implement this protocol to make the conversion.
Each DOM node will be a map or string (for Text/CDATASections). Nodes that
are maps have the appropriate subset of the keys
:type - [:comment, :document, :document-type, :element]
:tag - node's tag, check :type to see if applicable
:attrs - node's attributes as a map, check :type to see if applicable
:content - node's child nodes, in a vector, check :type to see if
applicable
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L20-L35">Source</a></sub></p>
## <a name="hickory.core/text">`Text`</a>
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L57-L57">Source</a></sub></p>
## <a name="hickory.core/as-hiccup">`as-hiccup`</a>
``` clojure
(as-hiccup this)
```
Function.
Converts the node given into a hiccup-format data structure. The
node must have an implementation of the HiccupRepresentable
protocol; nodes created by parse or parse-fragment already do.
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L15-L18">Source</a></sub></p>
## <a name="hickory.core/as-hickory">`as-hickory`</a>
``` clojure
(as-hickory this)
```
Function.
Converts the node given into a hickory-format data structure. The
node must have an implementation of the HickoryRepresentable protocol;
nodes created by parse or parse-fragment already do.
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L32-L35">Source</a></sub></p>
## <a name="hickory.core/extract-doctype">`extract-doctype`</a>
``` clojure
(extract-doctype s)
```
Function.
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L121-L126">Source</a></sub></p>
## <a name="hickory.core/format-doctype">`format-doctype`</a>
``` clojure
(format-doctype dt)
```
Function.
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L62-L69">Source</a></sub></p>
## <a name="hickory.core/node-type">`node-type`</a>
``` clojure
(node-type type)
```
Function.
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L37-L50">Source</a></sub></p>
## <a name="hickory.core/parse">`parse`</a>
``` clojure
(parse s)
```
Function.
Parse an entire HTML document into a DOM structure that can be
used as input to as-hiccup or as-hickory.
```klipse
(-> (parse "<a style=\"visibility:hidden\">foo</a><div style=\"color:green\"><p>Hello</p></div>")
as-hiccup)
```
```klipse
(-> (parse "<a style=\"visibility:hidden\">foo</a><div style=\"color:green\"><p>Hello</p></div>")
as-hickory)
```
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L151-L168">Source</a></sub></p>
## <a name="hickory.core/parse-dom-with-domparser">`parse-dom-with-domparser`</a>
``` clojure
(parse-dom-with-domparser s)
```
Function.
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L132-L135">Source</a></sub></p>
## <a name="hickory.core/parse-dom-with-write">`parse-dom-with-write`</a>
``` clojure
(parse-dom-with-write s)
```
Function.
Parse an HTML document (or fragment) as a DOM using document.implementation.createHTMLDocument and document.write.
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L137-L149">Source</a></sub></p>
## <a name="hickory.core/parse-fragment">`parse-fragment`</a>
``` clojure
(parse-fragment s)
```
Function.
Parse an HTML fragment (some group of tags that might be at home somewhere
in the tag hierarchy under <body>) into a list of DOM elements that can
each be passed as input to as-hiccup or as-hickory.
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L170-L175">Source</a></sub></p>
## <a name="hickory.core/remove-el">`remove-el`</a>
``` clojure
(remove-el el)
```
Function.
<p><sub><a href="https://github.com/clj-commons/hickory/blob/master/src/cljs/hickory/core.cljs#L128-L130">Source</a></sub></p>
================================================
FILE: LICENSE
================================================
Source code distributed under the Eclipse Public License - v 1.0:
THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE
PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF
THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
1. DEFINITIONS
"Contribution" means:
a) in the case of the initial Contributor, the initial code and
documentation distributed under this Agreement, and
b) in the case of each subsequent Contributor:
i) changes to the Program, and
ii) additions to the Program;
where such changes and/or additions to the Program originate from and
are distributed by that particular Contributor. A Contribution
'originates' from a Contributor if it was added to the Program by such
Contributor itself or anyone acting on such Contributor's
behalf. Contributions do not include additions to the Program which:
(i) are separate modules of software distributed in conjunction with
the Program under their own license agreement, and (ii) are not
derivative works of the Program.
"Contributor" means any person or entity that distributes the Program.
"Licensed Patents" mean patent claims licensable by a Contributor
which are necessarily infringed by the use or sale of its Contribution
alone or when combined with the Program.
"Program" means the Contributions distributed in accordance with this
Agreement.
"Recipient" means anyone who receives the Program under this
Agreement, including all Contributors.
2. GRANT OF RIGHTS
a) Subject to the terms of this Agreement, each Contributor hereby
grants Recipient a non-exclusive, worldwide, royalty-free copyright
license to reproduce, prepare derivative works of, publicly display,
publicly perform, distribute and sublicense the Contribution of such
Contributor, if any, and such derivative works, in source code and
object code form.
b) Subject to the terms of this Agreement, each Contributor hereby
grants Recipient a non-exclusive, worldwide, royalty-free patent
license under Licensed Patents to make, use, sell, offer to sell,
import and otherwise transfer the Contribution of such Contributor, if
any, in source code and object code form. This patent license shall
apply to the combination of the Contribution and the Program if, at
the time the Contribution is added by the Contributor, such addition
of the Contribution causes such combination to be covered by the
Licensed Patents. The patent license shall not apply to any other
combinations which include the Contribution. No hardware per se is
licensed hereunder.
c) Recipient understands that although each Contributor grants the
licenses to its Contributions set forth herein, no assurances are
provided by any Contributor that the Program does not infringe the
patent or other intellectual property rights of any other entity. Each
Contributor disclaims any liability to Recipient for claims brought by
any other entity based on infringement of intellectual property rights
or otherwise. As a condition to exercising the rights and licenses
granted hereunder, each Recipient hereby assumes sole responsibility
to secure any other intellectual property rights needed, if any. For
example, if a third party patent license is required to allow
Recipient to distribute the Program, it is Recipient's responsibility
to acquire that license before distributing the Program.
d) Each Contributor represents that to its knowledge it has sufficient
copyright rights in its Contribution, if any, to grant the copyright
license set forth in this Agreement.
3. REQUIREMENTS
A Contributor may choose to distribute the Program in object code form
under its own license agreement, provided that:
a) it complies with the terms and conditions of this Agreement; and
b) its license agreement:
i) effectively disclaims on behalf of all Contributors all warranties
and conditions, express and implied, including warranties or
conditions of title and non-infringement, and implied warranties or
conditions of merchantability and fitness for a particular purpose;
ii) effectively excludes on behalf of all Contributors all liability
for damages, including direct, indirect, special, incidental and
consequential damages, such as lost profits;
iii) states that any provisions which differ from this Agreement are
offered by that Contributor alone and not by any other party; and
iv) states that source code for the Program is available from such
Contributor, and informs licensees how to obtain it in a reasonable
manner on or through a medium customarily used for software exchange.
When the Program is made available in source code form:
a) it must be made available under this Agreement; and
b) a copy of this Agreement must be included with each copy of the Program.
Contributors may not remove or alter any copyright notices contained
within the Program.
Each Contributor must identify itself as the originator of its
Contribution, if any, in a manner that reasonably allows subsequent
Recipients to identify the originator of the Contribution.
4. COMMERCIAL DISTRIBUTION
Commercial distributors of software may accept certain
responsibilities with respect to end users, business partners and the
like. While this license is intended to facilitate the commercial use
of the Program, the Contributor who includes the Program in a
commercial product offering should do so in a manner which does not
create potential liability for other Contributors. Therefore, if a
Contributor includes the Program in a commercial product offering,
such Contributor ("Commercial Contributor") hereby agrees to defend
and indemnify every other Contributor ("Indemnified Contributor")
against any losses, damages and costs (collectively "Losses") arising
from claims, lawsuits and other legal actions brought by a third party
against the Indemnified Contributor to the extent caused by the acts
or omissions of such Commercial Contributor in connection with its
distribution of the Program in a commercial product offering. The
obligations in this section do not apply to any claims or Losses
relating to any actual or alleged intellectual property
infringement. In order to qualify, an Indemnified Contributor must: a)
promptly notify the Commercial Contributor in writing of such claim,
and b) allow the Commercial Contributor tocontrol, and cooperate with
the Commercial Contributor in, the defense and any related settlement
negotiations. The Indemnified Contributor may participate in any such
claim at its own expense.
For example, a Contributor might include the Program in a commercial
product offering, Product X. That Contributor is then a Commercial
Contributor. If that Commercial Contributor then makes performance
claims, or offers warranties related to Product X, those performance
claims and warranties are such Commercial Contributor's responsibility
alone. Under this section, the Commercial Contributor would have to
defend claims against the other Contributors related to those
performance claims and warranties, and if a court requires any other
Contributor to pay any damages as a result, the Commercial Contributor
must pay those damages.
5. NO WARRANTY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS
PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY
WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY
OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely
responsible for determining the appropriateness of using and
distributing the Program and assumes all risks associated with its
exercise of rights under this Agreement , including but not limited to
the risks and costs of program errors, compliance with applicable
laws, damage to or loss of data, programs or equipment, and
unavailability or interruption of operations.
6. DISCLAIMER OF LIABILITY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
7. GENERAL
If any provision of this Agreement is invalid or unenforceable under
applicable law, it shall not affect the validity or enforceability of
the remainder of the terms of this Agreement, and without further
action by the parties hereto, such provision shall be reformed to the
minimum extent necessary to make such provision valid and enforceable.
If Recipient institutes patent litigation against any entity
(including a cross-claim or counterclaim in a lawsuit) alleging that
the Program itself (excluding combinations of the Program with other
software or hardware) infringes such Recipient's patent(s), then such
Recipient's rights granted under Section 2(b) shall terminate as of
the date such litigation is filed.
All Recipient's rights under this Agreement shall terminate if it
fails to comply with any of the material terms or conditions of this
Agreement and does not cure such failure in a reasonable period of
time after becoming aware of such noncompliance. If all Recipient's
rights under this Agreement terminate, Recipient agrees to cease use
and distribution of the Program as soon as reasonably
practicable. However, Recipient's obligations under this Agreement and
any licenses granted by Recipient relating to the Program shall
continue and survive.
Everyone is permitted to copy and distribute copies of this Agreement,
but in order to avoid inconsistency the Agreement is copyrighted and
may only be modified in the following manner. The Agreement Steward
reserves the right to publish new versions (including revisions) of
this Agreement from time to time. No one other than the Agreement
Steward has the right to modify this Agreement. The Eclipse Foundation
is the initial Agreement Steward. The Eclipse Foundation may assign
the responsibility to serve as the Agreement Steward to a suitable
separate entity. Each new version of the Agreement will be given a
distinguishing version number. The Program (including Contributions)
may always be distributed subject to the version of the Agreement
under which it was received. In addition, after a new version of the
Agreement is published, Contributor may elect to distribute the
Program (including its Contributions) under the new version. Except as
expressly stated in Sections 2(a) and 2(b) above, Recipient receives
no rights or licenses to the intellectual property of any Contributor
under this Agreement, whether expressly, by implication, estoppel or
otherwise. All rights in the Program not expressly granted under this
Agreement are reserved.
This Agreement is governed by the laws of the State of Washington and
the intellectual property laws of the United States of America. No
party to this Agreement will bring a legal action under this Agreement
more than one year after the cause of action arose. Each party waives
its rights to a jury trial in any resulting litigation.
================================================
FILE: ORIGINATOR
================================================
@davidsantiago
================================================
FILE: README.md
================================================
[](https://clojars.org/org.clj-commons/hickory)
[](https://cljdoc.org/d/org.clj-commons/hickory)
[](https://circleci.com/gh/clj-commons/hickory)
# Hickory
Hickory parses HTML into Clojure data structures, so you can analyze,
transform, and output back to HTML. HTML can be parsed into
[hiccup](http://github.com/weavejester/hiccup) vectors, or into a
map-based DOM-like format very similar to that used by clojure.xml. It
can be used from both Clojure and Clojurescript.
## Usage
### Parsing
To start, you will want to process your HTML into a parsed
representation. Once the HTML is in this form, it can be converted to
either Hiccup or Hickory format for further processing. There are two
parsing functions, `parse` and `parse-fragment`. Both take a string
containing HTML and return the parser objects representing the
document. (It happens that these parser objects are Jsoup Documents
and Nodes, but I do not consider this to be an aspect worth preserving
if a change in parser should become necessary).
The first function, `parse` expects an entire HTML document, and
parses it using an HTML5 parser ([Jsoup](http://jsoup.org) on Clojure and
the browser's DOM parser in Clojurescript), which will
fix up the HTML as much as it can into a well-formed document. The
second function, `parse-fragment`, expects some smaller fragment of
HTML that does not make up a full document, and thus returns a list of
parsed fragments, each of which must be processed individually into
Hiccup or Hickory format. For example, if `parse-fragment` is given
"`<p><br>`" as input, it has no common parent for them, so it must
simply give you the list of nodes that it parsed.
These parsed objects can be turned into either Hiccup vector trees or
Hickory DOM maps using the functions `as-hiccup` or `as-hickory`.
Here's a usage example.
```clojure
user=> (use 'hickory.core)
nil
user=> (def parsed-doc (parse "<a href=\"foo\">foo</a>"))
#'user/parsed-doc
user=> (as-hiccup parsed-doc)
([:html {} [:head {}] [:body {} [:a {:href "foo"} "foo"]]])
user=> (as-hickory parsed-doc)
{:type :document, :content [{:type :element, :attrs nil, :tag :html, :content [{:type :element, :attrs nil, :tag :head, :content nil} {:type :element, :attrs nil, :tag :body, :content [{:type :element, :attrs {:href "foo"}, :tag :a, :content ["foo"]}]}]}]}
user=> (def parsed-frag (parse-fragment "<a href=\"foo\">foo</a> <a href=\"bar\">bar</a>"))
#'user/parsed-frag
user=> (as-hiccup parsed-frag)
IllegalArgumentException No implementation of method: :as-hiccup of protocol: #'hickory.core/HiccupRepresentable found for class: clojure.lang.PersistentVector clojure.core/-cache-protocol-fn (core_deftype.clj:495)
user=> (map as-hiccup parsed-frag)
([:a {:href "foo"} "foo"] " " [:a {:href "bar"} "bar"])
user=> (map as-hickory parsed-frag)
({:type :element, :attrs {:href "foo"}, :tag :a, :content ["foo"]} " " {:type :element, :attrs {:href "bar"}, :tag :a, :content ["bar"]})
```
In the example above, you can see an HTML document that is parsed once
and then converted to both Hiccup and Hickory formats. Similarly, a
fragment is parsed, but it cannot be directly used with `as-hiccup`
(or `as-hickory`), it must have those functions called on each element
in the list instead.
The namespace `hickory.zip` provides
[zippers](https://clojure.github.io/clojure/clojure.zip-api.html) for
both Hiccup and Hickory formatted data, with the functions
`hiccup-zip` and `hickory-zip`. Using zippers, you can easily traverse
the trees in any order you desire, make edits, and get the resulting
tree back. Here is an example of that.
```clojure
user=> (use 'hickory.zip)
nil
user=> (require '[clojure.zip :as zip])
nil
user=> (require '[hickory.render :refer [hickory-to-html]])
nil
user=> (-> (hiccup-zip (as-hiccup (parse "<a href=foo>bar<br></a>"))) zip/node)
([:html {} [:head {}] [:body {} [:a {:href "foo"} "bar" [:br {}]]]])
user=> (-> (hiccup-zip (as-hiccup (parse "<a href=foo>bar<br></a>"))) zip/next zip/node)
[:html {} [:head {}] [:body {} [:a {:href "foo"} "bar" [:br {}]]]]
user=> (-> (hiccup-zip (as-hiccup (parse "<a href=foo>bar<br></a>"))) zip/next zip/next zip/node)
[:head {}]
user=> (-> (hiccup-zip (as-hiccup (parse "<a href=foo>bar<br></a>")))
zip/next zip/next
(zip/replace [:head {:id "a"}])
zip/node)
[:head {:id "a"}]
user=> (-> (hiccup-zip (as-hiccup (parse "<a href=foo>bar<br></a>")))
zip/next zip/next
(zip/replace [:head {:id "a"}])
zip/root)
([:html {} [:head {:id "a"}] [:body {} [:a {:href "foo"} "bar" [:br {}]]]])
user=> (-> (hickory-zip (as-hickory (parse "<a href=foo>bar<br></a>")))
zip/next zip/next
(zip/replace {:type :element :tag :head :attrs {:id "a"} :content nil})
zip/root)
{:type :document, :content [{:type :element, :attrs nil, :tag :html, :content [{:content nil, :type :element, :attrs {:id "a"}, :tag :head} {:type :element, :attrs nil, :tag :body, :content [{:type :element, :attrs {:href "foo"}, :tag :a, :content ["bar" {:type :element, :attrs nil, :tag :br, :content nil}]}]}]}]}
user=> (hickory-to-html *1)
"<html><head id=\"a\"></head><body><a href=\"foo\">bar<br></a></body></html>"
```
In this example, we can see a basic document being parsed into Hiccup
form. Then, using zippers, the HEAD element is navigated to, and then
replaced with one that has an id of "a". The final tree, including the
modification, is also shown using `zip/root`. Then the same
modification is made using Hickory forms and zippers. Finally, the
modified Hickory version is printed back to HTML using the
`hickory-to-html` function.
### Selectors
Hickory also comes with a set of CSS-style selectors that operate on
hickory-format data in the `hickory.select` namespace. These selectors
do not exactly mirror the selectors in CSS, and are often more
powerful. There is no version of these selectors for hiccup-format
data, at this point.
A selector is simply a function that takes a zipper loc from a hickory
html tree data structure as its only argument. The selector will
return its argument if the selector applies to it, and nil
otherwise. Writing useful selectors can often be involved, so most of
the `hickory.select` package is actually made up of selector
combinators; functions that return useful selector functions by
specializing them to the data given as arguments, or by combining
together multiple selectors. For example, if we wanted to figure out
the dates of the next Formula 1 race weekend, we could do something
like this:
```clojure
user=> (use 'hickory.core)
nil
user=> (require '[hickory.select :as s])
nil
user=> (require '[clj-http.client :as client])
nil
user=> (require '[clojure.string :as string])
nil
user=> (def site-htree (-> (client/get "http://formula1.com/default.html") :body parse as-hickory))
#'user/site-htree
user=> (-> (s/select (s/child (s/class "subCalender") ; sic
(s/tag :div)
(s/id :raceDates)
s/first-child
(s/tag :b))
site-htree)
first :content first string/trim)
"10, 11, 12 May 2013"
```
In this example, we get the contents of the homepage and use `select`
to give us any nodes that satisfy the criteria laid out by the
selectors. The selector in this example is overly precise in order to
illustrate more selectors than we need; we could have gotten by just
selecting the contents of the P and then B tags inside the element
with id "raceDates".
Using the selectors allows you to search large HTML documents for nodes of interest with a relatively small amount of code. There are many selectors available in the [`hickory.select`](https://cljdoc.org/d/org.clj-commons/hickory/CURRENT/api/hickory.select) namespace, including:
- `node-type`: Give this function a keyword or string that names the contents of the `:type` field in a hickory node, and it gives you a selector that will select nodes of that type. Example: `(node-type :comment)`
- `tag`: Give this function a keyword or string that names the contents of the `:tag` field in a hickory node, and it gives you a selector that will select nodes with that tag. Example: `(tag :div)`
- `attr`: Give this function a keyword or string that names an attribute in the `:attrs` map of a hickory node, and it gives you a selector that will select nodes whose `:attrs` map contains that key. Give a single-argument function as an additional argument, and the resulting selector function will additionally require the value of that key to be such that the function given as the last argument returns true. Example: `(attr :id #(.startsWith % "foo"))`
- `id`: Give this function a keyword or string that names the `:id` attribute in the `:attrs` map and it will return a selector function that selects nodes that have that id (this comparison is case-insensitive). Example: `(id :raceDates)`
- `class`: Give this function a keyword or string that names a class that the node should have in the `:class` attribute in the `:attrs` map, and it will return a function that selects nodes that have the given class somewhere in their class string. Example: `(class :foo)`
- `any`: This selector takes no arguments, do not invoke it; returns any node that is an element, similarly to CSS's '*' selector.
- `element`: This selector is equivalent to the `any` selector; this alternate name can make it clearer when the intention is to exclude non-element nodes from consideration.
- `root`: This selector takes no arguments and should not be invoked; simply returns the root node (the HTML element).
- `n-moves-until`: This selector returns a selector function that selects its argument if that argument is some distance from a boundary. The first two arguments, `n` and `c` define the counting: it only selects nodes whose distance can be written in the form `nk+c` for some natural number `k`. The distance and boundary are defined by the number of times the zipper-movement function in the third argument is applied before the boundary function in the last argument is true. See doc string for details.
- `nth-of-type`: This selector returns a selector function that selects its argument if that argument is the `(nk+c)`'th child of the given tag type of some parent node for some natural `k`. Optionally, instead of the `n` and `c` arguments, the keywords `:odd` and `:even` can be given.
- `nth-last-of-type`: Just like `nth-of-type` but counts backwards from the last sibling.
- `nth-child`: This selector returns a selector function that selects its argument if that argument is the `(nk+c)`'th child of its parent node for some natural `k`. Instead of the `n` and `c` arguments, the keywords `:odd` and `:even` can be given.
- `nth-last-child`: Just like `nth-last-child` but counts backwards from the last sibling.
- `first-child`: Takes no arguments, do not invoke it; equivalent to `(nth-child 1)`.
- `last-child`: Takes no arguments, do not invoke it; equivalent to `(nth-last-child 1)`.
There are also selector combinators, which take as argument some number of other selectors, and return a new selector that combines them into one larger selector. An example of this is the `child` selector in the example above. Here's a list of some selector combinators in the package (see the [API Documentation](https://cljdoc.org/d/org.clj-commons/hickory) for the full list):
- `and`: Takes any number of selectors, and returns a selector that only selects nodes for which all of the argument selectors are true.
- `or`: Takes any number of selectors, and retrurns a selector that only selects nodes for which at least one of the argument selectors are true.
- `not`: Takes a single selector as argument and returns a selector that only selects nodes that its argument selector does not.
- `el-not`: Takes a single selector as argument and returns a selector that only selects element nodes that its argument selector does not.
- `child`: Takes any number of selectors as arguments and returns a selector that returns true when the zipper location given as the argument is at the end of a chain of direct child relationships specified by the selectors given as arguments.
- `descendant`: Takes any number of selectors as arguments and returns a selector that returns true when the zipper location given as the argument is at the end of a chain of descendant relationships specified by the selectors given as arguments.
We can illustrate the selector combinators by continuing the Formula 1 example above. We suspect, to our dismay, that Sebastian Vettel is leading the championship for the fourth year in a row.
```clojure
user=> (-> (s/select (s/descendant (s/class "subModule")
(s/class "standings")
(s/and (s/tag :tr)
s/first-child)
(s/and (s/tag :td)
(s/nth-child 2))
(s/tag :a))
site-htree)
first :content first string/trim)
"Sebastian Vettel"
```
Our fears are confirmed, Sebastian Vettel is well on his way to a fourth consecutive championship. If you were to inspect the page by hand (as of around May 2013, at least), you would see that unlike the `child` selector we used in the example above, the `descendant` selector allows the argument selectors to skip stages in the tree; we've left out some elements in this descendant relationship. The first table row in the driver standings table is selected with the `and`, `tag` and `first-child` selectors, and then the second `td` element is chosen, which is the element that has the driver's name (the first table element has the driver's standing) inside an `A` element. All of this is dependent on the exact layout of the HTML in the site we are examining, of course, but it should give an idea of how you can combine selectors to reach into a specific node of an HTML document very easily.
Finally, it's worth noting that the `select` function itself returns the hickory zipper nodes it finds. This is most useful for analyzing the contents of nodes. However, sometimes you may wish to examine the area around a node once you've found it. For this, you can use the `select-locs` function, which returns a sequence of hickory zipper locs, instead of the nodes themselves. This will allow you to navigate around the document tree using the zipper functions in `clojure.zip`. If you wish to go further and actually modify the document tree using zipper functions, you should not use `select-locs`. The problem is that it returns a bunch of zipper locs, but once you modify one, the others are out of date and do not see the changes (just as with any other persistent data structure in Clojure). Thus, their presence was useless and possibly confusing. Instead, you should use the `select-next-loc` function to walk through the document tree manually, moving through the locs that satisfy the selector function one by one, which will allow you to make modifications as you go. As with modifying any data structure as you traverse it, you must still be careful that your code does not add the thing it is selecting for, or it could get caught in an infinite loop. Finally, for more specialized selection needs, it should be possible to write custom selection functions that use the selectors and zipper functions without too much work. The functions discussed in this paragraph are very short and simple, you can use them as a guide.
The doc strings for the functions in the [`hickory.select`](https://cljdoc.org/d/org.clj-commons/hickory/CURRENT/api/hickory.select) namespace provide more details on most of these functions.
For more details, see the [API Documentation](https://cljdoc.org/d/org.clj-commons/hickory/).
## Hickory format
Why two formats? It's very easy to see in the example above, Hiccup is
very convenient to use for writing HTML. It has a compact syntax, with
CSS-like shortcuts for specifying classes and ids. It also allows
parts of the vector to be skipped if they are not important.
It's a little bit harder to process data in Hiccup format. First of
all, each form has to be checked for the presence of the attribute
map, and the traversal adjusted accordingly. Raw Hiccup vectors might
also have information about class and id in one of two different
places. Finally, not every piece of an HTML document can be expressed
in Hiccup without resorting to writing HTML in strings. For example,
if you want to put a doctype or comment on your document, it has to be
done as a string in your Hiccup form containing "`<!DOCTYPE html>`" or
"`<!--stuff-->`".
The Hickory format is another data format intended to allow a
roundtrip from HTML as text, into a data structure that is easy to
process and modify, and back into equivalent (but not identical, in
general) HTML. Because it can express all parts of an HTML document in
a parsed form, it is easier to search and modify the structure of the
document.
A Hickory node is either a map or a string. If it is a map, it will
have some subset of the following four keys, depending on the `:type`:
- `:type` - This will be one of `:comment`, `:document`, `:document-type`, `:element`
- `:tag` - A node's tag (for example, `:img`). This will only be present for nodes of type `:element`.
- `:attrs` - A node's attributes, as a map of keywords to values (for example, {:href "/a"}). This will only be present for nodes of type `:element`.
- `:content` - A node's child nodes, in a vector. Only `:comment`, `:document`, and `:element` nodes have children.
Text and CDATA nodes are represented as strings.
This is almost the exact same structure used by
[clojure.xml](https://clojure.github.io/clojure/clojure.xml-api.html),
the only difference being the addition of the `:type` field. Having
this field allows us to process nodes that clojure.xml leaves out of
the parsed data, like doctype and comments.
## Obtaining
To get hickory, add
```clojure
[org.clj-commons/hickory "0.7.3"]
```
to your project.clj, or an equivalent entry for your Maven-compatible build tool.
## ClojureScript support
Hickory works for all web browsers IE9+ (you can find a workaround for IE9 [here](http://stackoverflow.com/questions/9250545/javascript-domparser-access-innerhtml-and-other-properties)).
## Nodejs support
To parse markup on Nodejs, Hickory requires a Node DOM implementation.
Several are available from [npm](https://www.npmjs.com).
Install the npm package or use [lein-npm](https://github.com/RyanMcG/lein-npm).
Here are some alternatives:
- [jsdom](https://www.npmjs.com/package/jsdom) - **Caution:** this will not work if you're using figwheel
```clojure
(set! js/document (.jsdom (cljs.nodejs/require "jsdom")))
```
- [xmldom](https://www.npmjs.com/package/xmldom)
```clojure
(set! js/DOMParser (.-DOMParser (cljs.nodejs/require "xmldom")))
```
## Changes
- Version 0.7.1. Thanks to [Matt Grimm](https://github.com/tkocmathla) for adding the up-pred zipper function.
- Version 0.7.0. Thanks to [Ricardo J. Méndez](https://github.com/ricardojmendez) for the following updates.
* Removed dependency on cljx, since it was deprecated in June 2015.
* Converted all files and conditionals to cljc.
* Moved tests to cljs.test with doo, since cemerick.test was deprecated over a year ago.
* Updated Clojure and ClojureScript dependencies to avoid conflicts.
* Updated JSoup to 1.9.2, which should bring improved parsing performance.
- Released version 0.6.0.
* Updated JSoup to version 1.8.3. This version of JSoup contains bug fixes, but slightly changes the way it
handles HTML: some parses and output might have different case than before. HTML is still case-insensitive,
of course, but Hickory minor version has been increased just in case. API and semantics are otherwise unchanged.
- Released version 0.5.4.
* Fixed project dependencies so ClojureScript is moved to a dev-dependency.
- Released version 0.5.3.
* Minor bug fix to accommodate ClojureScript's new type hinting support.
- Released version 0.5.2.
* Updates the Clojurescript version to use the latest version of Clojurescript (0.0-1934).
- Released version 0.5.1.
* Added `has-child` and `has-descendant` selectors. Be careful with `has-descendant`, as it must do a full subtree search on each node, which is not fast.
- Released version 0.5.0.
* Now works in Clojurescript as well, huge thanks to [Julien Eluard](https://github.com/jeluard) for doing the heavy lifting on this.
* Reorganized parts of the API into more granular namespaces for better organization.
* Added functions to convert between Hiccup and Hickory format; note that this conversion is not always exact or roundtripable, and can cause a full HTML reparse.
* Added new selector, `element-child`, which selects element nodes that are the child of another element node.
* Numerous bug fixes and improvements.
- Released version 0.4.1, which adds a number of new selectors and selector combinators, including `find-in-text`, `precede-adjacent`, `follow-adjacent`, `precede` and `follow`.
- Released version 0.4.0. Adds the `hickory.select` namespace with many helpful functions for searching through hickory-format HTML documents for specific nodes.
- Released version 0.3.0. Provides a more helpful error message when hickory-to-html has an error. Now requires Clojure 1.4.
- Released version 0.2.3. Fixes a bug where hickory-to-html was not html-escaping the values of tag attributes.
- Released version 0.2.2. Fixes a bug where hickory-to-html was improperly html-escaping the contents of script/style tags.
- Released version 0.2.1. This version fixes bugs:
* hickory-to-html now properly escapes text nodes
* text nodes will now preserve whitespace correctly
- Released version 0.2.0. This version adds a second parsed data
format, explained above. To support this, the API for `parse` and
`parse-fragment` has been changed to allow their return values to be
passed to functions `as-hiccup` or `as-hickory` to determine the
final format. Also added are zippers for both Hiccup and Hickory
formats.
## License
Copyright © 2012 David Santiago
Distributed under the Eclipse Public License, the same as Clojure.
================================================
FILE: bb.edn
================================================
{:deps {io.github.borkdude/lein2deps {:git/url "https://github.com/borkdude/lein2deps"
:git/sha "e26edeb114c9d88a5c4d3abb683306588fcaad13"}}
:tasks
{:requires ([babashka.fs :as fs])
test:clj (shell "lein test")
test:cljs-npm-install (do (shell "npm install -g karma-cli")
(shell "npm install"))
test:cljs (do
(fs/delete-tree "cljs-test-runner-out")
(apply clojure "-M:test:cljs:cljs-test-runner" "--env" "chrome-headless" *command-line-args*))
repl:cljs-node (shell "rlwrap bb clojure -M:test:cljs -m cljs.main -re node")
repl:cljs-browser (shell "rlwrap bb clojure -M:test:cljs -m cljs.main")
quickdoc {:extra-deps {io.github.borkdude/quickdoc {:git/tag "v0.2.5", :git/sha "25784ca"}}
:requires ([quickdoc.api :as api])
:task (api/quickdoc {:source-paths ["src/clj" "src/cljs" "src/cljs"]
:git/branch "master"
:github/repo "https://github.com/clj-commons/hickory"})}}
}
================================================
FILE: deps.edn
================================================
{:paths ["src/clj" "src/cljc" "src/cljs"]
:deps
{org.clojure/clojure {:mvn/version "1.11.1"}
org.jsoup/jsoup {:mvn/version "1.22.2"}}
:aliases
{:test {:extra-paths ["test/cljc"]}
:cljs {:extra-deps {org.clojure/clojurescript {:mvn/version "1.11.60"}}}
:cljs-test-runner
{:extra-deps {olical/cljs-test-runner {:mvn/version "3.8.0"}
viebel/codox-klipse-theme {:mvn/version "0.0.1"}}
:extra-paths ["cljs-test-runner-out"]
:main-opts ["-m" "cljs-test-runner.main"
"--env" "chrome-headless"
"--namespace-regex" "hickory.test.*"]}}}
================================================
FILE: package.json
================================================
{
"devDependencies": {
"karma": "^6.4.1",
"karma-chrome-launcher": "^3.1.1",
"karma-cli": "^2.0.0",
"karma-cljs-test": "^0.1.0"
}
}
================================================
FILE: project.clj
================================================
(defproject org.clj-commons/hickory (or (System/getenv "PROJECT_VERSION") "0.7.1")
:description "HTML as Data"
:url "https://github.com/clj-commons/hickory"
:license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"}
:deploy-repositories [["clojars" {:url "https://repo.clojars.org"
:username :env/clojars_username
:password :env/clojars_org_clj_commons_password
:sign-releases true}]]
:dependencies [[org.clojure/clojure "1.11.1"]
[org.clojure/clojurescript "1.11.60" :scope "provided"]
[org.jsoup/jsoup "1.22.2"]]
:source-paths ["src/clj" "src/cljc" "src/cljs"]
:profiles
{:dev {:source-paths ["src/clj" "src/cljc"]}
:test {:source-paths ["src/cljs" "src/cljc" "test/cljc" "test/cljs"]}})
================================================
FILE: src/clj/hickory/core.clj
================================================
(ns hickory.core
(:require [hickory.utils :as utils]
[hickory.zip :as hzip]
[clojure.zip :as zip])
(:import [org.jsoup Jsoup]
[org.jsoup.nodes Attribute Attributes Comment DataNode Document
DocumentType Element TextNode XmlDeclaration]
[org.jsoup.parser Tag Parser]))
(set! *warn-on-reflection* true)
(defn- end-or-recur [as-fn loc data & [skip-child?]]
(let [new-loc (-> loc (zip/replace data) zip/next (cond-> skip-child? zip/next))]
(if (zip/end? new-loc)
(zip/root new-loc)
#(as-fn (zip/node new-loc) new-loc))))
;;
;; Protocols
;;
(defprotocol HiccupRepresentable
"Objects that can be represented as Hiccup nodes implement this protocol in
order to make the conversion."
(as-hiccup [this] [this zip-loc]
"Converts the node given into a hiccup-format data structure. The
node must have an implementation of the HiccupRepresentable
protocol; nodes created by parse or parse-fragment already do."))
(defprotocol HickoryRepresentable
"Objects that can be represented as HTML DOM node maps, similar to
clojure.xml, implement this protocol to make the conversion.
Each DOM node will be a map or string (for Text/CDATASections). Nodes that
are maps have the appropriate subset of the keys
:type - [:comment, :document, :document-type, :element]
:tag - node's tag, check :type to see if applicable
:attrs - node's attributes as a map, check :type to see if applicable
:content - node's child nodes, in a vector, check :type to see if
applicable"
(as-hickory [this] [this zip-loc]
"Converts the node given into a hickory-format data structure. The
node must have an implementation of the HickoryRepresentable protocol;
nodes created by parse or parse-fragment already do."))
(extend-protocol HiccupRepresentable
Attribute
;; Note the attribute value is not html-escaped; see comment for Element.
(as-hiccup
([this] (trampoline as-hiccup this (hzip/hiccup-zip this)))
([this _] [(utils/lower-case-keyword (.getKey this)) (.getValue this)]))
Attributes
(as-hiccup
([this] (trampoline as-hiccup this (hzip/hiccup-zip this)))
([this _] (into {} (map as-hiccup this))))
Comment
(as-hiccup
([this] (trampoline as-hiccup this (hzip/hiccup-zip this)))
([this loc] (end-or-recur as-hiccup loc (str "<!--" (.getData this) "-->"))))
DataNode
(as-hiccup
([this] (trampoline as-hiccup this (hzip/hiccup-zip this)))
([this loc] (end-or-recur as-hiccup loc (str this))))
Document
(as-hiccup
([this] (trampoline as-hiccup this (hzip/hiccup-zip this)))
([this loc] (end-or-recur as-hiccup loc (apply list (.childNodes this)))))
DocumentType
(as-hiccup
([this] (trampoline as-hiccup this (hzip/hiccup-zip this)))
([this loc]
(end-or-recur as-hiccup loc (utils/render-doctype (.name this)
(.publicId this)
(.systemId this)))))
Element
(as-hiccup
([this] (trampoline as-hiccup this (hzip/hiccup-zip this)))
([this loc]
;; There is an issue with the hiccup format, which is that it
;; can't quite cover all the pieces of HTML, so anything it
;; doesn't cover is thrown into a string containing the raw
;; HTML. This presents a problem because it is then never the case
;; that a string in a hiccup form should be html-escaped (except
;; in an attribute value) when rendering; it should already have
;; any escaping. Since the HTML parser quite properly un-escapes
;; HTML where it should, we have to go back and un-un-escape it
;; wherever text would have been un-escaped. We do this by
;; html-escaping the parsed contents of text nodes, and not
;; html-escaping comments, data-nodes, and the contents of
;; unescapable nodes.
(let [tag (utils/lower-case-keyword (.tagName this))
children (cond->> (.childNodes this) (utils/unescapable-content tag) (map str))
data (into [] (concat [tag (trampoline as-hiccup (.attributes this))] children))]
(end-or-recur as-hiccup loc data (utils/unescapable-content tag)))))
TextNode
;; See comment for Element re: html escaping.
(as-hiccup
([this] (trampoline as-hiccup this (hzip/hiccup-zip this)))
([this loc] (end-or-recur as-hiccup loc (utils/html-escape (.getWholeText this)))))
XmlDeclaration
(as-hiccup
([this] (trampoline as-hiccup this (hzip/hiccup-zip this)))
([this loc] (end-or-recur as-hiccup loc (str this)))))
(extend-protocol HickoryRepresentable
Attribute
(as-hickory
([this] (trampoline as-hickory this (hzip/hickory-zip this)))
([this _] [(utils/lower-case-keyword (.getKey this)) (.getValue this)]))
Attributes
(as-hickory
([this] (trampoline as-hickory this (hzip/hickory-zip this)))
([this _] (not-empty (into {} (map as-hickory this)))))
Comment
(as-hickory
([this] (trampoline as-hickory this (hzip/hickory-zip this)))
([this loc] (end-or-recur as-hickory loc {:type :comment
:content [(.getData this)]} true)))
DataNode
(as-hickory
([this] (trampoline as-hickory this (hzip/hickory-zip this)))
([this loc] (end-or-recur as-hickory loc (str this))))
Document
(as-hickory
([this] (trampoline as-hickory this (hzip/hickory-zip this)))
([this loc] (end-or-recur as-hickory loc {:type :document
:content (or (seq (.childNodes this)) nil)})))
DocumentType
(as-hickory
([this] (trampoline as-hickory this (hzip/hickory-zip this)))
([this loc] (end-or-recur as-hickory loc {:type :document-type
:attrs (trampoline as-hickory (.attributes this))})))
Element
(as-hickory
([this] (trampoline as-hickory this (hzip/hickory-zip this)))
([this loc] (end-or-recur as-hickory loc {:type :element
:attrs (trampoline as-hickory (.attributes this))
:tag (utils/lower-case-keyword (.tagName this))
:content (or (seq (.childNodes this)) nil)})))
TextNode
(as-hickory
([this] (trampoline as-hickory this (hzip/hickory-zip this)))
([this loc] (end-or-recur as-hickory loc (.getWholeText this)))))
(defn parse
"Parse an entire HTML document into a DOM structure that can be
used as input to as-hiccup or as-hickory."
[s]
(cond (instance? String s)
(Jsoup/parse ^String s)
(instance? java.io.File s)
(Jsoup/parse ^java.io.File s)
(instance? java.nio.file.Path s)
(Jsoup/parse ^java.nio.file.Path s)
:else
(throw (ex-info "Invalid input for parse" {:type (type s)}))))
(defn parse-fragment
"Parse an HTML fragment (some group of tags that might be at home somewhere
in the tag hierarchy under <body>) into a list of DOM elements that can
each be passed as input to as-hiccup or as-hickory."
[s]
(into [] (Parser/parseFragment s (Element. (Tag/valueOf "body") "") "")))
================================================
FILE: src/cljc/hickory/convert.cljc
================================================
(ns hickory.convert
"Functions to convert from one representation to another."
(:require [hickory.render :as render]
[hickory.core :as core]
[hickory.utils :as utils]))
(defn hiccup-to-hickory
"Given a sequence of hiccup forms representing a full document,
returns an equivalent hickory node representation of that document.
This will perform HTML5 parsing as a full document, no matter what
it is given.
Note that this function is heavyweight: it requires a full HTML
re-parse to work."
[hiccup-forms]
(core/as-hickory (core/parse (render/hiccup-to-html hiccup-forms))))
(defn hiccup-fragment-to-hickory
"Given a sequence of hiccup forms representing a document fragment,
returns an equivalent sequence of hickory fragments.
Note that this function is heavyweight: it requires a full HTML
re-parse to work."
[hiccup-forms]
(map core/as-hickory
(core/parse-fragment (render/hiccup-to-html hiccup-forms))))
(defn hickory-to-hiccup
"Given a hickory format dom object, returns an equivalent hiccup
representation. This can be done directly and exactly, but in general
you will not be able to go back from the hiccup."
[dom]
(if (string? dom)
(utils/html-escape dom)
(case (:type dom)
:document
(mapv hickory-to-hiccup (:content dom))
:document-type
(utils/render-doctype (get-in dom [:attrs :name])
(get-in dom [:attrs :publicid])
(get-in dom [:attrs :systemid]))
:element
(if (utils/unescapable-content (:tag dom))
(if (every? string? (:content dom))
;; Merge :attrs contents with {} to prevent nil from getting into
;; the hiccup forms when there are no attributes.
(apply vector (:tag dom) (into {} (:attrs dom)) (:content dom))
(throw (ex-info
"An unescapable content tag had non-string children."
{:error-location dom})))
(apply vector (:tag dom) (into {} (:attrs dom))
(map hickory-to-hiccup (:content dom))))
:comment
(str "<!--" (apply str (:content dom)) "-->"))))
================================================
FILE: src/cljc/hickory/hiccup_utils.cljc
================================================
(ns hickory.hiccup-utils
"Utilities for working with hiccup forms."
(:require [clojure.string :as str]))
(defn- first-idx
"Given two possible indexes, returns the lesser that is not -1. If both
are -1, then -1 is returned. Useful for searching strings for multiple
markers, as many routines will return -1 for not found.
Examples: (first-idx -1 -1) => -1
(first-idx -1 2) => 2
(first-idx 5 -1) => 5
(first-idx 5 3) => 3"
#?(:clj [^long a ^long b]
:cljs [a b])
(if (== a -1)
b
(if (== b -1)
a
(min a b))))
(defn- index-of
([^String s c]
#?(:clj (.indexOf s (int c))
:cljs (.indexOf s c)))
([^String s c idx]
#?(:clj (.indexOf s (int c) (int idx))
:cljs (.indexOf s c idx))))
(defn- split-keep-trailing-empty
"clojure.string/split is a wrapper on java.lang.String/split with the limit
parameter equal to 0, which keeps leading empty strings, but discards
trailing empty strings. This makes no sense, so we have to write our own
to keep the trailing empty strings."
[s re]
(str/split s re -1))
(defn tag-well-formed?
"Given a hiccup tag element, returns true iff the tag is in 'valid' hiccup
format. Which in this function means:
1. Tag name is non-empty.
2. If there is an id, there is only one.
3. If there is an id, it is nonempty.
4. If there is an id, it comes before any classes.
5. Any class name is nonempty."
[tag-elem]
(let [tag-elem (name tag-elem)
hash-idx (int (index-of tag-elem \#))
dot-idx (int (index-of tag-elem \.))
tag-cutoff (first-idx hash-idx dot-idx)]
(and (< 0 (count tag-elem)) ;; 1.
(if (== tag-cutoff -1) true (> tag-cutoff 0)) ;; 1.
(if (== hash-idx -1)
true
(and (== -1 (index-of tag-elem \# (inc hash-idx))) ;; 2.
(< (inc hash-idx) (first-idx (index-of tag-elem \. ;; 3.
(inc hash-idx))
(count tag-elem)))))
(if (and (not= hash-idx -1) (not= dot-idx -1)) ;; 4.
(< hash-idx dot-idx)
true)
(if (== dot-idx -1) ;; 5.
true
(let [classes (.substring tag-elem (inc dot-idx))]
(every? #(< 0 (count %))
(split-keep-trailing-empty classes #"\.")))))))
(defn tag-name
"Given a well-formed hiccup tag element, return just the tag name as
a string."
[tag-elem]
(let [tag-elem (name tag-elem)
hash-idx (int (index-of tag-elem \#))
dot-idx (int (index-of tag-elem \.))
cutoff (first-idx hash-idx dot-idx)]
(if (== cutoff -1)
;; No classes or ids, so the entire tag-element is the name.
tag-elem
;; There was a class or id, so the tag name ends at the first
;; of those.
(.substring tag-elem 0 cutoff))))
(defn class-names
"Given a well-formed hiccup tag element, return a vector containing
any class names included in the tag, as strings. Ignores the hiccup
requirement that any id on the tag must come
first. Example: :div.foo.bar => [\"foo\" \"bar\"]."
[tag-elem]
(let [tag-elem (name tag-elem)]
(loop [curr-dot (index-of tag-elem \.)
classes (transient [])]
(if (== curr-dot -1)
;; Didn't find another dot, so no more classes.
(persistent! classes)
;; There's another dot, so there's another class.
(let [next-dot (index-of tag-elem \. (inc curr-dot))
next-hash (index-of tag-elem \# (inc curr-dot))
cutoff (first-idx next-dot next-hash)]
(if (== cutoff -1)
;; Rest of the tag element is the last class.
(recur next-dot
(conj! classes (.substring tag-elem (inc curr-dot))))
;; The current class name is terminated by another element.
(recur next-dot
(conj! classes
(.substring tag-elem (inc curr-dot) cutoff)))))))))
(defn id
"Given a well-formed hiccup tag element, return a string containing
the id, or nil if there isn't one."
[tag-elem]
(let [tag-elem (name tag-elem)
hash-idx (int (index-of tag-elem \#))
next-dot-idx (int (index-of tag-elem \. hash-idx))]
(if (== hash-idx -1)
nil
(if (== next-dot-idx -1)
(.substring tag-elem (inc hash-idx))
(.substring tag-elem (inc hash-idx) next-dot-idx)))))
(defn- expand-content-seqs
"Given a sequence of hiccup forms, presumably the content forms of another
hiccup element, return a new sequence with any sequence elements expanded
into the main sequence. This logic does not apply recursively, so sequences
inside sequences won't be expanded out. Also note that this really only
applies to sequences; things that seq? returns true on. So this excludes
vectors.
(expand-content-seqs [1 '(2 3) (for [x [1 2 3]] (* x 2)) [5]])
==> (1 2 3 2 4 6 [5])"
[content]
(loop [remaining-content content
result (transient [])]
(if (nil? remaining-content)
(persistent! result)
(if (seq? (first remaining-content))
(recur (next remaining-content)
;; Fairly unhappy with this nested loop, but it seems
;; necessary to continue the handling of transient vector.
(loop [remaining-seq (first remaining-content)
result result]
(if (nil? remaining-seq)
result
(recur (next remaining-seq)
(conj! result (first remaining-seq))))))
(recur (next remaining-content)
(conj! result (first remaining-content)))))))
(defn- normalize-element
"Given a well-formed hiccup form, ensure that it is in the form
[tag attributes content1 ... contentN].
That is, an unadorned tag name (keyword, lowercase), all attributes in the
attribute map in the second element, and then any children. Note that this
does not happen recursively; content is not modified."
[hiccup-form]
(let [[tag-elem & content] hiccup-form]
(when (not (tag-well-formed? tag-elem))
(throw (ex-info (str "Invalid input: Tag element"
tag-elem "is not well-formed.")
{})))
(let [tag-name (keyword (str/lower-case (tag-name tag-elem)))
tag-classes (class-names tag-elem)
tag-id (id tag-elem)
tag-attrs {:id tag-id
:class (if (not (empty? tag-classes))
(str/join " " tag-classes))}
[map-attrs content] (if (map? (first content))
[(first content) (rest content)]
[nil content])
;; Note that we replace tag attributes with map attributes, without
;; merging them. This is to match hiccup's behavior.
attrs (merge tag-attrs map-attrs)]
(apply vector tag-name attrs content))))
(defn normalize-form
"Given a well-formed hiccup form, recursively normalizes it, so that it and
all children elements will also be normalized. A normalized form is in the
form
[tag attributes content1 ... contentN].
That is, an unadorned tag name (keyword, lowercase), all attributes in the
attribute map in the second element, and then any children. Any content
that is a sequence is also expanded out into the main sequence of content
items."
[form]
(if (string? form)
form
;; Do a pre-order walk and save the first two items, then do the children,
;; then glue them back together.
(let [[tag attrs & contents] (normalize-element form)]
(apply vector tag attrs (map #(if (vector? %)
;; Recurse only on vec children.
(normalize-form %)
%)
(expand-content-seqs contents))))))
================================================
FILE: src/cljc/hickory/render.cljc
================================================
(ns hickory.render
(:require [hickory.hiccup-utils :as hu]
[hickory.utils :as utils]
[clojure.string :as str]))
;;
;; Hickory to HTML
;;
(defn- render-hickory-attribute
"Given a map entry m, representing the attribute name and value, returns a
string representing that key/value pair as it would be rendered into HTML."
[m]
(str " " (name (key m)) "=\"" (utils/html-escape (val m)) "\""))
(defn hickory-to-html
"Given a hickory HTML DOM map structure (as returned by as-hickory), returns a
string containing HTML it represents. Keep in mind this function is not super
fast or heavy-duty.
Note that it will NOT in general be the case that
(= my-html-src (hickory-to-html (as-hickory (parse my-html-src))))
as we do not keep any letter case or whitespace information, any
\"tag-soupy\" elements, attribute quote characters used, etc."
[dom]
(if (string? dom)
(utils/html-escape dom)
(try
(case (:type dom)
:document
(apply str (map hickory-to-html (:content dom)))
:document-type
(utils/render-doctype (get-in dom [:attrs :name])
(get-in dom [:attrs :publicid])
(get-in dom [:attrs :systemid]))
:element
(cond
(utils/void-element (:tag dom))
(str "<" (name (:tag dom))
(apply str (map render-hickory-attribute (:attrs dom)))
">")
(utils/unescapable-content (:tag dom))
(str "<" (name (:tag dom))
(apply str (map render-hickory-attribute (:attrs dom)))
">"
(apply str (:content dom)) ;; Won't get html-escaped.
"</" (name (:tag dom)) ">")
:else
(str "<" (name (:tag dom))
(apply str (map render-hickory-attribute (:attrs dom)))
">"
(apply str (map hickory-to-html (:content dom)))
"</" (name (:tag dom)) ">"))
:comment
(str "<!--" (apply str (:content dom)) "-->"))
(catch #?(:clj IllegalArgumentException
:cljs js/Error) e
(throw
(if (utils/starts-with #?(:clj (.getMessage e) :cljs (.-message e)) "No matching clause: ")
(ex-info (str "Not a valid node: " (pr-str dom)) {:dom dom})
e))))))
;;
;; Hiccup to HTML
;;
(defn- render-hiccup-attrs
"Given a hiccup attribute map, returns a string containing the attributes
rendered as they should appear in an HTML tag, right after the tag (including
a leading space to separate from the tag, if any attributes present)."
[attrs]
;; Hiccup normally does not html-escape strings, but it does for attribute
;; values.
(let [attrs-str (->> (for [[k v] attrs]
(cond (true? v)
(str (name k))
(nil? v)
""
:else
(str (name k) "=" "\"" (utils/html-escape v) "\"")))
(filter #(not (empty? %)))
sort
(str/join " "))]
(if (not (empty? attrs-str))
;; If the attrs-str is not "", we need to pad the front so that the
;; tag will separate from the attributes. Otherwise, "" is fine to return.
(str " " attrs-str)
attrs-str)))
(declare hiccup-to-html)
(defn- render-hiccup-element
"Given a normalized hiccup element (such as the output of
hickory.hiccup-utils/normalize-form; see this function's docstring
for more detailed definition of a normalized hiccup element), renders
it to HTML and returns it as a string."
[n-element]
(let [[tag attrs & content] n-element]
(if (utils/void-element tag)
(str "<" (name tag) (render-hiccup-attrs attrs) ">")
(str "<" (name tag) (render-hiccup-attrs attrs) ">"
(hiccup-to-html content)
"</" (name tag) ">"))))
(defn- render-hiccup-form
"Given a normalized hiccup form (such as the output of
hickory.hiccup-utils/normalize-form; see this function's docstring
for more detailed definition of a normalized hiccup form), renders
it to HTML and returns it as a string."
[n-form]
(if (vector? n-form)
(render-hiccup-element n-form)
n-form))
(defn hiccup-to-html
"Given a sequence of hiccup forms (as returned by as-hiccup), returns a
string containing HTML it represents. Keep in mind this function is not super
fast or heavy-duty, and definitely not a replacement for dedicated hiccup
renderers, like hiccup itself, which *is* fast and heavy-duty.
```klipse
(hiccup-to-html '([:html {} [:head {}] [:body {} [:a {} \"foo\"]]]))
```
Note that it will NOT in general be the case that
(= my-html-src (hiccup-to-html (as-hiccup (parse my-html-src))))
as we do not keep any letter case or whitespace information, any
\"tag-soupy\" elements, attribute quote characters used, etc. It will also
not generally be the case that this function's output will exactly match
hiccup's.
For instance:
```klipse
(hiccup-to-html (as-hiccup (parse \"<A href=\\\"foo\\\">foo</A>\")))
```
"
[hiccup-forms]
(apply str (map #(render-hiccup-form (hu/normalize-form %)) hiccup-forms)))
================================================
FILE: src/cljc/hickory/select.cljc
================================================
(ns hickory.select
"Functions to query hickory-format HTML data.
See clojure.zip for more information on zippers, locs, nodes, next, etc."
(:require [clojure.zip :as zip]
[clojure.string :as string]
[hickory.zip :as hzip])
#?(:clj
(:import clojure.lang.IFn))
(:refer-clojure :exclude [and or not class]))
;;
;; Utilities
;;
(defn until
"Calls f on val until pred called on the result is true. If not, it
repeats by calling f on the result, etc. The value that made pred
return true is returned."
[f val pred]
(let [next-val (f val)]
(if (pred next-val)
next-val
(recur f next-val pred))))
(defn count-until
"Calls f on val until pred called on the result is true. If not, it
repeats by calling f on the result, etc. The count of times this
process was repeated until pred returned true is returned."
[f val pred]
(loop [next-val val
cnt 0]
(if (pred next-val)
cnt
(recur (f next-val) (inc cnt)))))
(defn next-pred
"Like clojure.zip/next, but moves until it reaches a node that returns
true when the function in the pred argument is called on them, or reaches
the end."
[hzip-loc pred]
(until zip/next hzip-loc #(clojure.core/or (zip/end? %)
(pred %))))
(defn prev-pred
"Like clojure.zip/prev, but moves until it reaches a node that returns
true when the function in the pred argument is called on them, or reaches
the beginning."
[hzip-loc pred]
(until zip/prev hzip-loc #(clojure.core/or (nil? %)
(pred %))))
(defn left-pred
"Like clojure.zip/left, but moves until it reaches a node that returns
true when the function in the pred argument is called on them, or reaches
the left boundary of the current group of siblings."
[hzip-loc pred]
(until zip/left hzip-loc #(clojure.core/or (nil? %)
(pred %))))
(defn right-pred
"Like clojure.zip/right, but moves until it reaches a node that returns
true when the function in the pred argument is called on them, or reaches
the right boundary of the current group of siblings."
[hzip-loc pred]
(until zip/right hzip-loc #(clojure.core/or (nil? %)
(pred %))))
(defn up-pred
"Like clojure.zip/up, but moves until it reaches a node that returns
true when the function in the pred argument is called on them, or reaches
the beginning."
[hzip-loc pred]
(until zip/up hzip-loc #(clojure.core/or (nil? %)
(pred %))))
(defn next-of-node-type
"Like clojure.zip/next, but only counts moves to nodes that have
the given type."
[hzip-loc node-type]
(next-pred hzip-loc #(= node-type (:type (zip/node %)))))
(defn prev-of-node-type
"Like clojure.zip/prev, but only counts moves to nodes that have
the given type."
[hzip-loc node-type]
(prev-pred hzip-loc #(= node-type (:type (zip/node %)))))
(defn left-of-node-type
"Like clojure.zip/left, but only counts moves to nodes that have
the given type."
[hzip-loc node-type]
(left-pred hzip-loc #(= node-type (:type (zip/node %)))))
(defn right-of-node-type
"Like clojure.zip/right, but only counts moves to nodes that have
the given type."
[hzip-loc node-type]
(right-pred hzip-loc #(= node-type (:type (zip/node %)))))
(defn after-subtree
"Given a zipper loc, returns the zipper loc that is the first one after
the arg's subtree, if there is a subtree. If there is no loc after this
loc's subtree, returns the end node."
[zip-loc]
(if (zip/end? zip-loc)
zip-loc
(clojure.core/or (zip/right zip-loc)
(loop [curr-loc zip-loc]
(if (zip/up curr-loc)
(clojure.core/or (zip/right (zip/up curr-loc))
(recur (zip/up curr-loc)))
[(zip/node curr-loc) :end])))))
;;
;; Select
;;
(defn select-next-loc
"Given a selector function and a loc inside a hickory zip data structure,
returns the next zipper loc that satisfies the selection function. This can
be the loc that is passed in, so be sure to move to the next loc if you
want to use this function to exhaustively search through a tree manually.
Note that if there is no next node that satisfies the selection function, nil
is returned.
The third argument, if present, must be a function of one argument that is
called on a zipper loc to return the next loc to consider in the search. By
default, this argument is zip/next. The fourth argument, if present, must be
a function of one argument that is called on a zipper loc to determine if
the end of the search has been reached (true return value). When the fourth
argument returns true on a loc, that loc is not considered in the search and
the search finishes with a nil return. By default, the fourth argument is
zip/end?."
([selector-fn hzip-loc]
(select-next-loc selector-fn hzip-loc zip/next))
([selector-fn hzip-loc next-fn]
(select-next-loc selector-fn hzip-loc next-fn zip/end?))
([selector-fn hzip-loc next-fn end?-fn]
(loop [loc hzip-loc]
(if (end?-fn loc)
nil
(if (selector-fn loc)
loc
(recur (next-fn loc)))))))
(defn select-locs
"Given a selector function and a hickory data structure, returns a vector
containing all of the zipper locs selected by the selector function."
[selector-fn hickory-tree]
(loop [loc (select-next-loc selector-fn
(hzip/hickory-zip hickory-tree))
selected-nodes (transient [])]
(if (nil? loc)
(persistent! selected-nodes)
(recur (select-next-loc selector-fn (zip/next loc))
(conj! selected-nodes loc)))))
(defn select
"Given a selector function and a hickory data structure, returns a vector
containing all of the hickory nodes selected by the selector function."
[selector-fn hickory-tree]
(mapv zip/node (select-locs selector-fn hickory-tree)))
;;
;; Selectors
;;
;; Mostly based off the spec at http://www.w3.org/TR/selectors/#selectors
;; Some selectors are simply not possible outside a browser (active,
;; visited, etc).
;;
(defn node-type
"Return a function that takes a zip-loc argument and returns the
zip-loc passed in iff it has the given node type. The type
argument can be a String or Named (keyword, symbol). The node type
comparison is done case-insensitively."
[type]
(fn [hzip-loc]
(let [node (zip/node hzip-loc)
node-type (-> node :type)]
(if (clojure.core/and node-type
(= (string/lower-case (name node-type))
(string/lower-case (name type))))
hzip-loc))))
(defn tag
"Return a function that takes a zip-loc argument and returns the
zip-loc passed in iff it has the given tag. The tag argument can be
a String or Named (keyword, symbol). The tag name comparison
is done case-insensitively."
[tag]
(fn [hzip-loc]
(let [node (zip/node hzip-loc)
node-tag (-> node :tag)]
(if (clojure.core/and node-tag
(= (string/lower-case (name node-tag))
(string/lower-case (name tag))))
hzip-loc))))
(defn attr
"Returns a function that takes a zip-loc argument and returns the
zip-loc passed in iff it has the given attribute, and that attribute
optionally satisfies a predicate given as an additional argument. With
a single argument, the attribute name (a string, keyword, or symbol),
the function returned will return the zip-loc if that attribute is
present (and has any value) on the zip-loc's node. The attribute name
will be compared case-insensitively, but the attribute value (if present),
will be passed as-is to the predicate.
If the predicate argument is given, it will only return the zip-loc if
that predicate is satisfied when given the attribute's value as its only
argument. Note that the predicate only gets called when the attribute is
present, so it can assume its argument is not nil."
([attr-name]
;; Since we want this call to succeed in any case where this attr
;; is present, we pass in a function that always returns true.
(attr attr-name (fn [_] true)))
([attr-name predicate]
;; Note that attribute names are normalized to lowercase by
;; jsoup, as an html5 parser should; see here:
;; http://www.whatwg.org/specs/web-apps/current-work/#attribute-name-state
(fn [hzip-loc]
(let [node (zip/node hzip-loc)
attr-key (keyword (string/lower-case (name attr-name)))]
;; If the attribute does not exist, we'll definitely return null.
;; Otherwise, we'll ask the predicate if we should return hzip-loc.
(if (clojure.core/and (contains? (:attrs node) attr-key)
(predicate (get-in node [:attrs attr-key])))
hzip-loc)))))
(defn id
"Returns a function that takes a zip-loc argument and returns the
zip-loc passed in iff it has the given id. The id argument can be
a String or Named (keyword, symbol). The id name comparison
is done case-insensitively."
[id]
(attr :id #(= (string/lower-case %)
(string/lower-case (name id)))))
(defn class
"Returns a function that takes a zip-loc argument and returns the
zip-loc passed in iff it has the given class. The class argument can
be a String or Named (keyword, symbol). The class name comparison
is done case-insensitively."
[class-name]
(letfn [(parse-classes [class-str]
(into #{} (mapv string/lower-case
(string/split class-str #"\s+"))))]
(attr :class #(contains? (parse-classes %)
(string/lower-case (name class-name))))))
(defn any
"This selector takes no args, it simply is the selector function. It returns
true on any element it is called on; corresponds to the CSS '*' selector."
[hzip-loc]
(if (= :element (-> (zip/node hzip-loc) :type))
hzip-loc))
(def element
"Another name for the any selector, to express that it can be used to only
select elements."
any)
(defn element-child
"This selector takes no args, it simply is the selector function. It returns
the zip-loc passed in iff that loc is an element, and it has a parent
that is also an element."
[hzip-loc]
(let [possible-parent (zip/up hzip-loc)]
(clojure.core/and (element hzip-loc)
;; Check that we are not at the top already first.
possible-parent
(element possible-parent))))
(defn root
"This selector takes no args, it simply is the selector function. It returns
the zip-loc of the root node (the HTML element)."
[hzip-loc]
(if (= :html (-> (zip/node hzip-loc) :tag))
hzip-loc))
(defn find-in-text
"Returns a function that takes a zip-loc argument and returns the zip-loc
passed in iff it has some text node in its contents that matches the regular
expression. Note that this only applies to the direct text content of a node;
nodes which have the given text in one of their child nodes will not be
selected."
[re]
(fn [hzip-loc]
(when (some #(re-find re %)
(->> (zip/node hzip-loc)
:content
(filter string?)))
hzip-loc)))
(defn n-moves-until
"This selector returns a selector function that selects its argument if
that argument is some \"distance\" from a \"boundary.\" This is an abstract
way of phrasing it, but it captures the full generality.
The selector this function returns will apply the move argument to its own
output, beginning with its zipper loc argument, until the term-pred argument
called on its output returns true. At that point, the number of times the
move function was called successfully is compared to kn+c; if there exists
some value of k such that the two quantities are equal, then the selector
will return the argument zipper loc successfully.
For example, (n-moves-until 2 1 clojure.zip/left nil?) will return a selector
that calls zip/left on its own output, beginning with the argument zipper
loc, until its return value is nil (nil? returns true). Suppose it called
left 5 times before zip/left returned nil. Then the selector will return
with success, since 2k+1 = 5 for k = 2.
Most nth-child-* selectors in this package use n-moves-until in their
implementation."
[n c move term-pred]
(fn [hzip-loc]
(let [distance (count-until move hzip-loc term-pred)]
(if (== 0 n)
;; No stride, so distance must = c to select.
(if (== distance c)
hzip-loc)
;; There's a stride, so need to subtract c and see if the
;; remaining distance is a multiple of n.
(if (== 0 (rem (- distance c) n))
hzip-loc)))))
(defn nth-of-type
"Returns a function that returns true if the node is the nth child of
its parent (and it has a parent) of the given tag type. First element is 1,
last is n."
([c typ]
(cond (= :odd c)
(nth-of-type 2 1 typ)
(= :even c)
(nth-of-type 2 0 typ)
:else
(nth-of-type 0 c typ)))
([n c typ]
(fn [hzip-loc]
;; We're only interested in elements whose parents are also elements,
;; so check this up front and maybe save some work.
(if (clojure.core/and (element-child hzip-loc)
(= typ (:tag (zip/node hzip-loc))))
(let [sel (n-moves-until n c
#(left-pred % (fn [x] (-> (zip/node x)
:tag
(= typ))))
nil?)]
(sel hzip-loc))))))
(defn nth-last-of-type
"Returns a function that returns true if the node is the nth last child of
its parent (and it has a parent) of the given tag type. First element is 1,
last is n."
([c typ]
(cond (= :odd c)
(nth-last-of-type 2 1 typ)
(= :even c)
(nth-last-of-type 2 0 typ)
:else
(nth-last-of-type 0 c typ)))
([n c typ]
(fn [hzip-loc]
;; We're only interested in elements whose parents are also elements,
;; so check this up front and maybe save some work.
(if (clojure.core/and (element-child hzip-loc)
(= typ (:tag (zip/node hzip-loc))))
(let [sel (n-moves-until n c
#(right-pred % (fn [x] (-> (zip/node x)
:tag
(= typ))))
nil?)]
(sel hzip-loc))))))
(defn nth-child
"Returns a function that returns true if the node is the nth child of
its parent (and it has a parent). First element is 1, last is n."
([c]
(cond (= :odd c)
(nth-child 2 1)
(= :even c)
(nth-child 2 0)
:else
(nth-child 0 c)))
([n c]
(fn [hzip-loc]
;; We're only interested in elements whose parents are also elements,
;; so check this up front and maybe save some work.
(if (element-child hzip-loc)
(let [sel (n-moves-until n c #(left-of-node-type % :element) nil?)]
(sel hzip-loc))))))
(defn nth-last-child
"Returns a function that returns true if the node has n siblings after it,
and has a parent."
([c]
(cond (= :odd c)
(nth-last-child 2 1)
(= :even c)
(nth-last-child 2 0)
:else
(nth-last-child 0 c)))
([n c]
(fn [hzip-loc]
;; We're only interested in elements whose parents are also elements,
;; so check this up front and maybe save some work.
(if (element-child hzip-loc)
(let [sel (n-moves-until n c #(right-of-node-type % :element) nil?)]
(sel hzip-loc))))))
(defn first-child
"This selector takes no args, it is simply the selector. Returns
true if the node is the first child of its parent (and it has a
parent)."
[hzip-loc]
(clojure.core/and (element-child hzip-loc)
((nth-child 1) hzip-loc)))
(defn last-child
"This selector takes no args, it is simply the selector. Returns
true if the node is the last child of its parent (and it has a
parent."
[hzip-loc]
(clojure.core/and (element-child hzip-loc)
((nth-last-child 1) hzip-loc)))
;;
;; Selector combinators
;;
(defn and
"Takes any number of selectors and returns a selector that is true if
all of the argument selectors are true."
[& selectors]
(fn [zip-loc]
(if (every? #(% zip-loc) selectors)
zip-loc)))
(defn or
"Takes any number of selectors and returns a selector that is true if
any of the argument selectors are true."
[& selectors]
(fn [zip-loc]
(if (some #(% zip-loc) selectors)
zip-loc)))
(defn not
"Takes a selector argument and returns a selector that is true if
the underlying selector is false on its argument, and vice versa."
[selector]
(fn [hzip-loc]
(if (clojure.core/not (selector hzip-loc))
hzip-loc)))
(defn el-not
"Takes a selector argument and returns a selector that is true if
the underlying selector is false on its argument and vice versa, and
additionally that argument is an element node. Compared to the 'not'
selector, this corresponds more closely to the CSS equivalent, which
will only ever select elements."
[selector]
(and (node-type :element)
(not selector)))
(defn compose-unary
"Takes a unary selection function and any number of selectors and returns
a selector which returns true when each selector and the unary function
applied to each subsequenct selector returns true.
Example:
(compose-unary has-child (tag :div) (class :foo) (attr :disabled))
Produces the equivalent of:
(and (tag :div)
(has-child (and (class :foo)
(has-child (and (attr :disabled))))))"
[unary-selector-fn & selectors]
(let [rev (reverse selectors)]
(loop [selectors (rest rev)
output (and (first rev))]
(cond
(empty? selectors) output
(= (count selectors) 1) (and (first selectors) (unary-selector-fn output))
:else (recur (rest selectors)
(and (first selectors) (unary-selector-fn output)))))))
(defn ordered-adjacent
"Takes a zipper movement function and any number of selectors as arguments
and returns a selector that returns true when the zip-loc given as the
argument is satisfied by the first selector, and the zip-loc arrived at by
applying the move-fn argument is satisfied by the second selector, and so
on for all the selectors given as arguments. If the move-fn
moves to nil before the full selector list is satisfied, the entire
selector fails, but note that success is checked before a move to nil is
checked, so satisfying the last selector with the last node you can move
to succeeds."
[move-fn & selectors]
;; We'll work backwards through the selector list with an index. First we'll
;; build the selector list into an array for quicker access. We'll do it
;; immediately and then closure-capture the result, so it does not get
;; redone every time the selector is called.
(let [selectors (into-array IFn selectors)]
(fn [hzip-loc]
(loop [curr-loc hzip-loc
idx 0]
(cond (>= idx (count selectors))
hzip-loc ;; Got to end satisfying selectors, return the loc.
(nil? curr-loc)
nil ;; Ran off a boundary before satisfying selectors, return nil.
:else
(if-let [next-loc ((nth selectors idx) curr-loc)]
(recur (move-fn next-loc)
(inc idx))))))))
(defn child
"Takes any number of selectors as arguments and returns a selector that
returns true when the zip-loc given as the argument is at the end of
a chain of direct child relationships specified by the selectors given as
arguments.
Example: (child (tag :div) (class :foo) (attr :disabled))
will select the input in
<div><span class=\"foo\"><input disabled></input></span></div>
but not in
<div><span class=\"foo\"><b><input disabled></input></b></span></div>"
[& selectors]
(apply ordered-adjacent zip/up (reverse selectors)))
(defn has-child
"Takes a selector as argument and returns a selector that returns true
when some direct child node of the zip-loc given as the argument satisfies
the selector.
Example: (has-child (tag :div))
will select only the inner span in
<div><span><div></div></span></div>"
[selector]
(fn [hzip-loc]
(let [subtree-start-loc (-> hzip-loc zip/down)
has-children? (not= nil subtree-start-loc)]
;; has-children? is needed to guard against zip/* receiving a nil arg in
;; a selector.
(if has-children?
(if (select-next-loc selector subtree-start-loc
zip/right
#(nil? %))
hzip-loc)))))
(defn parent
"Takes any number of selectors as arguments and returns a selector that
returns true when the zip-loc given as the argument is at the start of
a chain of direct child relationships specified by the selectors given
as arguments.
Example: (parent (tag :div) (class :foo) (attr :disabled))
will select the div in
<div><span class=\"foo\"><input disabled></input></span></div>
but not in
<div><span class=\"foo\"><b><input disabled></input></b></span></div>"
[& selectors]
(apply compose-unary has-child selectors))
(defn follow-adjacent
"Takes any number of selectors as arguments and returns a selector that
returns true when the zip-loc given as the argument is at the end of
a chain of direct element sibling relationships specified by the selectors
given as arguments.
Example: (follow-adjacent (tag :div) (class :foo))
will select the span in
<div>...</div><span class=\"foo\">...</span>
but not in
<div>...</div><b>...</b><span class=\"foo\">...</span>"
[& selectors]
(apply ordered-adjacent
#(left-of-node-type % :element)
(reverse selectors)))
(defn precede-adjacent
"Takes any number of selectors as arguments and returns a selector that
returns true when the zip-loc given as the argument is at the beginning of
a chain of direct element sibling relationships specified by the selectors
given as arguments.
Example: (precede-adjacent (tag :div) (class :foo))
will select the div in
<div>...</div><span class=\"foo\">...</span>
but not in
<div>...</div><b>...</b><span class=\"foo\">...</span>"
[& selectors]
(apply ordered-adjacent
#(right-of-node-type % :element)
selectors))
(defn ordered
"Takes a zipper movement function and any number of selectors as arguments
and returns a selector that returns true when the zip-loc given as the
argument is satisfied by the first selector, and some zip-loc arrived at by
applying the move-fn argument *one or more times* is satisfied by the second
selector, and so on for all the selectors given as arguments. If the move-fn
moves to nil before a the full selector list is satisfied, the entire
selector fails, but note that success is checked before a move to nil is
checked, so satisfying the last selector with the last node you can move
to succeeds."
[move-fn & selectors]
;; This function is a lot like ordered-adjacent, above, but:
;; 1) failing to fulfill a selector does not stop us moving along the tree
;; 2) therefore, we need to make sure the first selector matches the loc under
;; consideration, and not merely one that is farther along the movement
;; direction.
(let [selectors (into-array IFn selectors)]
(fn [hzip-loc]
;; First need to check that the first selector matches the current loc,
;; or else we can return nil immediately.
(let [fst-selector (nth selectors 0)]
(if (fst-selector hzip-loc)
;; First selector matches this node, so now check along the
;; movement direction for the rest of the selectors.
(loop [curr-loc (move-fn hzip-loc)
idx 1]
(cond (>= idx (count selectors))
hzip-loc ;; Satisfied all selectors, so return the orig. loc.
(nil? curr-loc)
nil ;; Ran out of movements before selectors, return nil.
:else
(if ((nth selectors idx) curr-loc)
(recur (move-fn curr-loc)
(inc idx))
;; Failed, so move but retry the same selector
(recur (move-fn curr-loc) idx)))))))))
(defn descendant
"Takes any number of selectors as arguments and returns a selector that
returns true when the zip-loc given as the argument is at the end of
a chain of descendant relationships specified by the
selectors given as arguments. To be clear, the node selected matches
the final selector, but the previous selectors can match anywhere in
the node's ancestry, provided they match in the order they are given
as arguments, from top to bottom.
Example: (descendant (tag :div) (class :foo) (attr :disabled))
will select the input in both
<div><span class=\"foo\"><input disabled></input></span></div>
and
<div><span class=\"foo\"><b><input disabled></input></b></span></div>"
[& selectors]
(apply ordered zip/up (reverse selectors)))
(defn has-descendant
"Takes a selector as argument and returns a selector that returns true
when some descendant node of the zip-loc given as the argument satisfies
the selector.
Be aware that because this selector must do a full sub-tree search on
each node examined, it can have terrible performance. It's helpful if this is
a late clause in an `and`, to prevent it from even attempting to match
unless other criteria have been met first.
Example: (has-descendant (tag :div))
will select the span and the outer div, but not the inner div, in
<span><div><div></div></div></span>"
[selector]
(fn [hzip-loc]
;; Want to not count the current node, and stop after the last node
;; in the subtree of it has been checked, which is the next node
;; after the rightmost child.
(let [subtree-start-loc (-> hzip-loc zip/down)
has-children? (not= nil subtree-start-loc)]
;; has-children? is needed to guard against zip/* receiving a nil arg in
;; a selector.
(if has-children?
(let [subtree-end-loc (after-subtree hzip-loc)]
(if (select-next-loc selector subtree-start-loc
zip/next
#(= % subtree-end-loc))
hzip-loc))))))
(defn ancestor
"Takes any number of selectors as arguments and returns a selector that
returns true when the zip-loc given as the argument is at the start of
a chain of descendant relationships specified by the selectors given
as arguments; intervening elements that do not satisfy a selector are
simply ignored and do not prevent a match.
Example: (ancestor (tag :div) (class :foo) (attr :disabled))
will select the div in both
<div><span class=\"foo\"><input disabled></input></span></div>
and
<div><span class=\"foo\"><b><input disabled></input></b></span></div>"
[& selectors]
(apply compose-unary has-descendant selectors))
(defn follow
"Takes any number of selectors as arguments and returns a selector that
returns true when the zip-loc given as the argument is at the end of
a chain of element sibling relationships specified by the selectors
given as arguments; intervening elements that do not satisfy a selector
are simply ignored and do not prevent a match.
Example: (follow (tag :div) (class :foo))
will select the span in both
<div>...</div><span class=\"foo\">...</span>
and
<div>...</div><b>...</b><span class=\"foo\">...</span>"
[& selectors]
(apply ordered #(left-of-node-type % :element) (reverse selectors)))
(defn precede
"Takes any number of selectors as arguments and returns a selector that
returns true when the zip-loc given as the argument is at the beginning of
a chain of element sibling relationships specified by the selectors
given as arguments; intervening elements that do not satisfy a selector
are simply ignored and do not prevent a match.
Example: (precede (tag :div) (class :foo))
will select the div in both
<div>...</div><span class=\"foo\">...</span>
and
<div>...</div><b>...</b><span class=\"foo\">...</span>"
[& selectors]
(apply ordered #(right-of-node-type % :element) selectors))
================================================
FILE: src/cljc/hickory/utils.cljc
================================================
(ns hickory.utils
"Miscellaneous utilities used internally."
(:require [clojure.string :as string]
#?(:cljs [goog.string :as gstring])))
;;
;; Data
;;
(def void-element
"Elements that don't have a meaningful <tag></tag> form."
#{:area :base :br :col :command :embed :hr :img :input :keygen :link :meta
:param :source :track :wbr})
(def unescapable-content
"Elements whose content should never have html-escape codes."
#{:script :style})
;;
;; String utils
;;
(defn clj-html-escape-without-quoin
"Actually copy pasted from quoin: https://github.com/davidsantiago/quoin/blob/develop/src/quoin/text.clj"
[^String s]
;; This method is "Java in Clojure" for serious speedups.
#?(:clj (let [sb (StringBuilder.)
slength (long (count s))]
(loop [idx (long 0)]
(if (>= idx slength)
(.toString sb)
(let [c (char (.charAt s idx))]
(case c
\& (.append sb "&")
\< (.append sb "<")
\> (.append sb ">")
\" (.append sb """)
(.append sb c))
(recur (inc idx))))))
;; This shouldn't be called directly in cljs, but if it is, we use the same implementation as the html-escape function
:cljs (gstring/htmlEscape s)))
(defn html-escape
[s]
#?(:clj (clj-html-escape-without-quoin s)
:cljs (gstring/htmlEscape s)))
(defn starts-with
[^String s ^String prefix]
#?(:clj (.startsWith s prefix)
:cljs (goog.string.startsWith s prefix)))
(defn lower-case-keyword
"Converts its string argument into a lowercase keyword."
[s]
(-> s string/lower-case keyword))
(defn render-doctype
"Returns a string containing the HTML source for the doctype with given args.
The second and third arguments can be nil or empty strings."
[name publicid systemid]
(str "<!DOCTYPE " name
(when (not-empty publicid)
(str " PUBLIC \"" publicid "\""))
(when (not-empty systemid)
(str " \"" systemid "\""))
">"))
================================================
FILE: src/cljc/hickory/zip.cljc
================================================
(ns hickory.zip
(:require [clojure.zip :as zip]))
;;
;; Hickory
;;
(defn hickory-zip
"Returns a zipper for html dom maps (as from as-hickory),
given a root element."
[root]
(zip/zipper (complement string?)
(comp seq :content)
(fn [node children]
(assoc node :content (and children (apply vector children))))
root))
;;
;; Hiccup
;;
;; Just to make things easier, we go ahead and do the work here to
;; make hiccup zippers work on both normalized (all items have tag,
;; attrs map, and any children) and unnormalized hiccup forms.
(defn- children
"Takes a hiccup node (normalized or not) and returns its children nodes."
[node]
(if (vector? node)
;; It's a hiccup node vector.
(if (map? (second node)) ;; There is an attr map in second slot.
(seq (subvec node 2)) ;; So skip tag and attr vec.
(seq (subvec node 1))) ;; Otherwise, just skip tag.
;; Otherwise, must have a been a node list
node))
;; Note, it's not made clear at all in the docs for clojure.zip, but as far as
;; I can tell, you are given a node potentially with existing children and
;; the sequence of children that should totally replace the existing children.
(defn- make
"Takes a hiccup node (normalized or not) and a sequence of children nodes,
and returns a new node that has the the children argument as its children."
[node children]
;; The node might be either a vector (hiccup form) or a seq (which is like a
;; node-list).
(if (vector? node)
(if (map? (second node)) ;; Again, check for normalized vec.
(into (subvec node 0 2) children) ;; Attach children after tag&attrs.
(apply vector (first node) children)) ;; Otherwise, attach after tag.
children)) ;; We were given a list for node, so just return the new list.
(defn hiccup-zip
"Returns a zipper for Hiccup forms, given a root form."
[root]
(zip/zipper sequential?
children
make
root))
================================================
FILE: src/cljs/hickory/core.cljs
================================================
(ns hickory.core
(:require [hickory.utils :as utils]
[clojure.zip :as zip]
[goog.string :as gstring]
[goog.dom]
[goog.dom.NodeType]))
;;
;; Protocols
;;
(defprotocol HiccupRepresentable
"Objects that can be represented as Hiccup nodes implement this protocol in
order to make the conversion."
(as-hiccup [this]
"Converts the node given into a hiccup-format data structure. The
node must have an implementation of the HiccupRepresentable
protocol; nodes created by parse or parse-fragment already do."))
(defprotocol HickoryRepresentable
"Objects that can be represented as HTML DOM node maps, similar to
clojure.xml, implement this protocol to make the conversion.
Each DOM node will be a map or string (for Text/CDATASections). Nodes that
are maps have the appropriate subset of the keys
:type - [:comment, :document, :document-type, :element]
:tag - node's tag, check :type to see if applicable
:attrs - node's attributes as a map, check :type to see if applicable
:content - node's child nodes, in a vector, check :type to see if
applicable"
(as-hickory [this]
"Converts the node given into a hickory-format data structure. The
node must have an implementation of the HickoryRepresentable protocol;
nodes created by parse or parse-fragment already do."))
(defn node-type [type]
(case type
"ELEMENT" 1
"ATTRIBUTE" 2
"TEXT" 3
"CDATA_SECTION" 4
"ENTITY_REFERENCE" 5
"ENTITY" 6
"PROCESSING_INSTRUCTION" 7
"COMMENT" 8
"DOCUMENT" 9
"DOCUMENT_TYPE" 10
"DOCUMENT_FRAGMENT" 11
"NOTATION" 12))
(def Attribute (node-type "ATTRIBUTE"))
(def Comment (node-type "COMMENT"))
(def Document (node-type "DOCUMENT"))
(def DocumentType (node-type "DOCUMENT_TYPE"))
(def Element (node-type "ELEMENT"))
(def Text (node-type "TEXT"))
(defn- as-seq [nodelist]
(if (seq? nodelist) nodelist (array-seq nodelist)))
(defn format-doctype
[dt]
(let [name (.-name dt)
publicId (.-publicId dt)
systemId (.-systemId dt)]
(if (seq publicId)
(gstring/format "<!DOCTYPE %s PUBLIC \"%s\" \"%s\">" name publicId systemId)
(str "<!DOCTYPE " name ">"))))
(extend-protocol HiccupRepresentable
object
(as-hiccup [this] (condp = (.-nodeType this)
Attribute [(utils/lower-case-keyword (.-name this))
(.-value this)]
Comment (str "<!--" (.-data this) "-->")
Document (map as-hiccup (as-seq (.-childNodes this)))
DocumentType (format-doctype this)
;; There is an issue with the hiccup format, which is that it
;; can't quite cover all the pieces of HTML, so anything it
;; doesn't cover is thrown into a string containing the raw
;; HTML. This presents a problem because it is then never the case
;; that a string in a hiccup form should be html-escaped (except
;; in an attribute value) when rendering; it should already have
;; any escaping. Since the HTML parser quite properly un-escapes
;; HTML where it should, we have to go back and un-un-escape it
;; wherever text would have been un-escaped. We do this by
;; html-escaping the parsed contents of text nodes, and not
;; html-escaping comments, data-nodes, and the contents of
;; unescapable nodes.
Element (let [tag (utils/lower-case-keyword (.-tagName this))]
(into [] (concat [tag
(into {} (map as-hiccup (as-seq (.-attributes this))))]
(if (utils/unescapable-content tag)
(map goog.dom.getRawTextContent (as-seq (.-childNodes this)))
(map as-hiccup (as-seq (.-childNodes this)))))))
Text (utils/html-escape (goog.dom.getRawTextContent this)))))
(extend-protocol HickoryRepresentable
object
(as-hickory [this] (condp = (.-nodeType this)
Attribute [(utils/lower-case-keyword (.-name this)) (.-value this)]
Comment {:type :comment
:content [(.-data this)]}
Document {:type :document
:content (not-empty
(into [] (map as-hickory
(as-seq (.-childNodes this)))))}
DocumentType {:type :document-type
:attrs {:name (.-name this)
:publicid (.-publicId this)
:systemid (.-systemId this)}}
Element {:type :element
:attrs (not-empty (into {} (map as-hickory (as-seq (.-attributes this)))))
:tag (utils/lower-case-keyword (.-tagName this))
:content (not-empty
(into [] (map as-hickory
(as-seq (.-childNodes this)))))}
Text (goog.dom.getRawTextContent this))))
(defn extract-doctype
[s]
;;Starting HTML5 doctype definition can be uppercase
(when-let [doctype (second (or (re-find #"<!DOCTYPE ([^>]*)>" s)
(re-find #"<!doctype ([^>]*)>" s)))]
(re-find #"([^\s]*)(\s+PUBLIC\s+[\"]?([^\"]*)[\"]?\s+[\"]?([^\"]*)[\"]?)?" doctype)))
(defn remove-el
[el]
(.removeChild (.-parentNode el) el))
(defn parse-dom-with-domparser
[s]
(when (exists? js/DOMParser)
(.parseFromString (js/DOMParser.) s "text/html")))
(defn parse-dom-with-write
"Parse an HTML document (or fragment) as a DOM using document.implementation.createHTMLDocument and document.write."
[s]
;;See http://www.w3.org/TR/domcore/#dom-domimplementation-createhtmldocument for more details.
(let [doc (.createHTMLDocument js/document.implementation "") ;;empty title for older implementation
doctype-el (.-doctype doc)]
(when-not (extract-doctype s);; Remove default doctype if parsed string does not define it.
(remove-el doctype-el))
(when-let [title-el (-> doc .-head .-firstChild)];; Remove default title if parsed string does not define it.
(when (empty? (.-text title-el))
(remove-el title-el)))
(.write doc s)
doc))
(defn parse
"Parse an entire HTML document into a DOM structure that can be
used as input to as-hiccup or as-hickory.
```klipse
(-> (parse \"<a style=\\\"visibility:hidden\\\">foo</a><div style=\\\"color:green\\\"><p>Hello</p></div>\")
as-hiccup)
```
```klipse
(-> (parse \"<a style=\\\"visibility:hidden\\\">foo</a><div style=\\\"color:green\\\"><p>Hello</p></div>\")
as-hickory)
```
"
[s]
(or (parse-dom-with-domparser s) (parse-dom-with-write s)))
(defn parse-fragment
"Parse an HTML fragment (some group of tags that might be at home somewhere
in the tag hierarchy under <body>) into a list of DOM elements that can
each be passed as input to as-hiccup or as-hickory."
[s]
(as-seq (-> (parse s) .-body .-childNodes)))
================================================
FILE: test/cljc/hickory/test/convert.cljc
================================================
(ns hickory.test.convert
(:require
[clojure.test :refer [deftest is]]
[hickory.convert :refer [hiccup-fragment-to-hickory hiccup-to-hickory
hickory-to-hiccup]]
[hickory.core :refer [as-hiccup as-hickory parse parse-fragment]]))
(deftest hiccup-to-hickory-test
(is (= (as-hickory (parse "<i>Hi.</i>"))
(hiccup-to-hickory (as-hiccup (parse "<i>Hi.</i>")))))
(is (= (as-hickory (parse "<i>Outer<b class=\"foo\">Inner.</b></i>"))
(hiccup-to-hickory (as-hiccup (parse "<i>Outer<b class=\"foo\">Inner.</b></i>")))))
(is (= (as-hickory (parse "<a href='http://localhost/?a=1&b=2'>Hi</a>"))
(hiccup-to-hickory (as-hiccup (parse "<a href='http://localhost/?a=1&b=2'>Hi</a>")))))
(is (= (as-hickory (parse "<script>alert();</script>"))
(hiccup-to-hickory (as-hiccup (parse "<script>alert();</script>"))))))
(deftest hiccup-fragment-to-hickory-test
(is (= (map as-hickory (parse-fragment "<img src=\"a.jpg\">"))
(hiccup-fragment-to-hickory (map as-hiccup (parse-fragment "<img src=\"a.jpg\">")))))
(let [src "<a href=\"/a.txt\"><img src=\"a.jpg\"></a><b>It's an a.</b>"]
(is (= (map as-hickory (parse-fragment src))
(hiccup-fragment-to-hickory (map as-hiccup (parse-fragment src)))))))
(deftest hickory-to-hiccup-test
(is (= (as-hiccup (parse "<i>Hi.</i>"))
(hickory-to-hiccup (as-hickory (parse "<i>Hi.</i>")))))
(is (= (as-hiccup (parse "<i>Outer<b class=\"foo\">Inner.</b></i>"))
(hickory-to-hiccup (as-hickory (parse "<i>Outer<b class=\"foo\">Inner.</b></i>")))))
(is (= (as-hiccup (parse "<a href='http://localhost/?a=1&b=2'>Hi</a>"))
(hickory-to-hiccup (as-hickory (parse "<a href='http://localhost/?a=1&b=2'>Hi</a>")))))
(is (= (as-hiccup (parse "<script>alert();</script>"))
(hickory-to-hiccup (as-hickory (parse "<script>alert();</script>")))))
;; Fragments
(is (= (map as-hiccup (parse-fragment "<img src=\"a.jpg\">"))
(map hickory-to-hiccup (map as-hickory (parse-fragment "<img src=\"a.jpg\">")))))
(let [src "<a href=\"/a.txt\"><img src=\"a.jpg\"></a><b>It's an a.</b>"]
(is (= (map as-hiccup (parse-fragment src))
(map hickory-to-hiccup (map as-hickory (parse-fragment src)))))))
================================================
FILE: test/cljc/hickory/test/core.cljc
================================================
(ns hickory.test.core
(:require [hickory.core :refer [as-hickory as-hiccup parse parse-fragment]]
[clojure.test :refer [deftest is]]))
;; This document tests: doctypes, white space text nodes, attributes,
;; and cdata nodes.
(deftest basic-documents
(is (= ["<!DOCTYPE html>"
[:html {}
[:head {}]
[:body {}
[:a {:href "foo"} "foo"] " "
[:a {:id "so", :href "bar"} "bar"]
[:script {:src "blah.js"} "alert(\"hi\");"]]]]
(as-hiccup (parse "<!DOCTYPE html><a href=\"foo\">foo</a> <a id=\"so\" href=\"bar\">bar</a><script src=\"blah.js\">alert(\"hi\");</script>"))))
(is (= {:type :document,
:content [{:type :document-type,
:attrs {:name "html", :publicid "", :systemid ""
#?@(:clj [:#doctype "html"])}}
{:type :element,
:attrs nil,
:tag :html,
:content [{:type :element,
:attrs nil,
:tag :head,
:content nil}
{:type :element,
:attrs nil,
:tag :body,
:content [{:type :element,
:attrs {:href "foo"},
:tag :a,
:content ["foo"]}
" "
{:type :element,
:attrs {:id "so", :href "bar"},
:tag :a,
:content ["bar"]}
{:type :element,
:attrs {:src "blah.js"},
:tag :script,
:content ["alert(\"hi\");"]}]}]}]}
(as-hickory (parse "<!DOCTYPE html><a href=\"foo\">foo</a> <a id=\"so\" href=\"bar\">bar</a><script src=\"blah.js\">alert(\"hi\");</script>")))))
;; This document tests: doctypes, comments, white space text nodes, attributes,
;; and cdata nodes.
(deftest basic-documents2
(is (= ["<!DOCTYPE html>"
[:html {}
[:head {}]
[:body {}
"<!--comment-->"
[:a {:href "foo"} "foo"] " "
[:a {:id "so", :href "bar"} "bar"]
[:script {:src "blah.js"} "alert(\"hi\");"]]]]
(as-hiccup (parse "<!DOCTYPE html><body><!--comment--><a href=\"foo\">foo</a> <a id=\"so\" href=\"bar\">bar</a><script src=\"blah.js\">alert(\"hi\");</script></body>"))))
(is (= {:type :document,
:content [{:type :document-type,
:attrs {:name "html", :publicid "", :systemid ""
#?@(:clj [:#doctype "html"])}}
{:type :element,
:attrs nil,
:tag :html,
:content [{:type :element,
:attrs nil,
:tag :head,
:content nil}
{:type :element,
:attrs nil,
:tag :body,
:content [{:type :comment
:content ["comment"]}
{:type :element,
:attrs {:href "foo"},
:tag :a,
:content ["foo"]}
" "
{:type :element,
:attrs {:id "so", :href "bar"},
:tag :a,
:content ["bar"]}
{:type :element,
:attrs {:src "blah.js"},
:tag :script,
:content ["alert(\"hi\");"]}]}]}]}
(as-hickory (parse "<!DOCTYPE html><body><!--comment--><a href=\"foo\">foo</a> <a id=\"so\" href=\"bar\">bar</a><script src=\"blah.js\">alert(\"hi\");</script></body>")))))
;; Want to test a document fragment that has multiple nodes with no parent,
;; as well as a text node between nodes.
(deftest basic-document-fragment
(is (= [[:a {:href "foo"} "foo"] " "
[:a {:href "bar"} "bar"]]
(map as-hiccup
(parse-fragment "<a href=\"foo\">foo</a> <a href=\"bar\">bar</a>"))))
(is (= [{:type :element,
:attrs {:href "foo"},
:tag :a,
:content ["foo"]}
" "
{:type :element,
:attrs {:href "bar"},
:tag :a,
:content ["bar"]}]
(map as-hickory
(parse-fragment "<a href=\"foo\">foo</a> <a href=\"bar\">bar</a>")))))
(deftest unencoded-text-nodes
;; Hiccup versions - Note that hiccup representation does not html-escape any
;; strings that aren't attribute values, so the hiccup representation will
;; have the string contents html-escaped.
(is (= [[:html {} [:head {}] [:body {} [:p {} "ABC&\n\nDEF."]]]]
(as-hiccup (parse "<p>ABC&\n\nDEF.</p>"))))
;; <pre> tag preserves whitespace.
(is (= [[:html {} [:head {}] [:body {} [:pre {} "ABC&\n\nDEF."]]]]
(as-hiccup (parse "<pre>ABC&\n\nDEF.</pre>"))))
;; Hickory versions - Note that the representation is different, and Hickory
;; format does not keep HTML escaped in its representation, as it can
;; figure out what to escape at render time.
(is (= "ABC&\n\nDEF."
(get-in (as-hickory (parse "<p>ABC&\n\nDEF.</p>"))
[:content 0 :content 1 :content 0 :content 0])))
;; <pre> tag preserves whitespace.
(is (= "ABC&\n\nDEF."
(get-in (as-hickory (parse "<pre>ABC&\n\nDEF.</pre>"))
[:content 0 :content 1 :content 0 :content 0]))))
;; Issue #50: Tests that the parser does not throw a StackOverflowError when
;; parsing a document with deeply nested HTML tags.
;; I don't have time for this
#_(deftest deeply-nested-tags
(let [jsoup (parse (apply str (repeat 2048 "<font>abc")))
r1 (get-in (vec (as-hiccup jsoup))
(concat [0 3 2] (repeat 2047 3)))
r2 (get-in (as-hickory jsoup)
(apply concat
[:content 0 :content 1 :content 0]
(repeat 2047 [:content 1])))]
(println "R1" r1)
(println "R2" r2)
(is (= [:font {} "abc"]
r1))
(is (= {:type :element
:attrs nil
:tag :font
:content ["abc"]}
r2))))
#?(:cljs
(deftest node-type-test
(is (= 3 (hickory.core/node-type "TEXT")))))
================================================
FILE: test/cljc/hickory/test/hiccup_utils.cljc
================================================
(ns hickory.test.hiccup-utils
(:require [hickory.hiccup-utils :refer [class-names id normalize-form
tag-name tag-well-formed?]]
[clojure.test :refer [deftest is]]))
#?(:clj
(deftest first-idx-test
(let [first-idx #'hickory.hiccup-utils/first-idx]
(is (= -1 (first-idx -1 -1)))
(is (= 2 (first-idx -1 2)))
(is (= 5 (first-idx 5 -1)))
(is (= 3 (first-idx 5 3)))
(is (= 3 (first-idx 3 5))))))
(deftest tag-well-formed?-test
(is (= true (tag-well-formed? :a)))
(is (= true (tag-well-formed? :a#id)))
(is (= true (tag-well-formed? :a#id.class)))
(is (= true (tag-well-formed? :a.class.class2)))
(is (= false (tag-well-formed? "")))
(is (= false (tag-well-formed? ".class")))
(is (= false (tag-well-formed? "a#")))
(is (= false (tag-well-formed? "a#foo.")))
(is (= false (tag-well-formed? "a.")))
(is (= false (tag-well-formed? "a.foo.")))
(is (= false (tag-well-formed? "#id.class")))
(is (= false (tag-well-formed? :a.class#id)))
(is (= false (tag-well-formed? :a#id#id2))))
(deftest tag-name-test
(is (= "a" (tag-name "a")))
(is (= "a" (tag-name 'a)))
(is (= "a" (tag-name :a)))
(is (= "b" (tag-name :b.class)))
(is (= "b" (tag-name :b#id)))
(is (= "b" (tag-name :b.class#id)))
(is (= "b" (tag-name :b#id.class))))
(deftest class-names-test
(is (= [] (class-names :a)))
(is (= [] (class-names :a#foo)))
(is (= ["foo"] (class-names "a.foo")))
(is (= ["bar"] (class-names :a#foo.bar)))
(is (= ["foo" "bar"] (class-names :a.foo.bar))))
(deftest id-test
(is (= nil (id :a)))
(is (= nil (id 'a)))
(is (= "foo" (id :a#foo)))
(is (= "foo" (id :a#foo.bar))))
#?(:clj
(deftest expand-content-seqs-test
(let [expand-content-seqs #'hickory.hiccup-utils/expand-content-seqs]
(is (= [1 2 3] (expand-content-seqs [1 2 3])))
(is (= [1 2 [3]] (expand-content-seqs [1 '(2 [3])])))
;; Example from docstring.
(is (= [1 2 3 2 4 6 [5]]
(expand-content-seqs [1 '(2 3) (for [x [1 2 3]] (* x 2)) [5]]))))))
#?(:clj
(deftest normalize-element-test
(let [normalize-element #'hickory.hiccup-utils/normalize-element]
(is (= [:a {:id nil :class nil} "Hi"] (normalize-element [:a "Hi"])))
(is (= [:a {:id "foo" :class nil} "Hi"]
(normalize-element [:A#foo "Hi"])))
(is (= [:a {:id nil :class "foo"} "Hi"]
(normalize-element [:a.foo "Hi"])))
(is (= [:a {:id "foo" :class "bar"} "Hi" "There"]
(normalize-element [:a#foo.bar "Hi" "There"])))
(is (= [:a {:id "foo" :class "bar"} "Hi"]
(normalize-element [:a.bar {:id "foo"} "Hi"])))
(is (= [:a {:id "foo" :class "bar"}]
(normalize-element [:A#bip {:id "foo" :class "bar"}])))
(is (= [:a {:id "foo" :class "bar"}]
(normalize-element [:a#bip.baz {:id "foo" :class "bar"}])))
(is (= [:a {:id nil :class "foo bar"}]
(normalize-element [:a.foo.bar]))))))
(deftest normalize-form-test
(is (= [:a {:id nil :class nil}] (normalize-form [:A])))
(is (= [:a {:id nil :class nil :href "localhost"}]
(normalize-form [:a {:href "localhost"}])))
(is (= [:a {:id nil :class nil}
[:b {:id nil :class nil} "foo"]
[:i {:id nil :class nil} "bar"]]
(normalize-form [:a [:b "foo"] [:i "bar"]])))
(is (= [:a {:id nil :class nil}
[:b {:id nil :class nil} "foo"]
[:i {:id nil :class nil} "bar"]]
(normalize-form [:a '([:b "foo"] [:i "bar"])])))
(is (= [:a {:id nil :class nil}
[:b {:id nil :class nil} "foo" [:i {:id nil :class nil} "bar"]]]
(normalize-form [:a [:b "foo" [:i "bar"]]]))))
================================================
FILE: test/cljc/hickory/test/render.cljc
================================================
(ns hickory.test.render
(:require
[clojure.test :refer [deftest is]]
[hickory.core :refer [as-hiccup as-hickory parse parse-fragment]]
[hickory.render :refer [hiccup-to-html hickory-to-html]]))
;;
;; Hickory to HTML
;;
(deftest hickory-to-html-test
(is (= "<!DOCTYPE html><html><head></head><body><p><!--hi--><a href=\"foo\" id=\"bar\">hi</a></p></body></html>"
(hickory-to-html (as-hickory (parse "<!DOCTYPE html><P><!--hi--><a href=foo id=\"bar\">hi")))))
;; Make sure void elements don't have closing tags.
(is (= "<html><head></head><body>Hi<br>There</body></html>"
(hickory-to-html (as-hickory (parse "<html><head></head><body>Hi<br>There</body></html>")))))
;; Make sure text is properly escaped.
(is (= "<code><html></code>"
(hickory-to-html (as-hickory (first (parse-fragment "<code><html></code>"))))))
;; Make sure the contents of script/style tags do not get html escaped.
(is (= "<script>Test<!--Test&Test-->Test</script>"
(hickory-to-html (as-hickory
(first (parse-fragment "<body><script>Test<!--Test&Test-->Test</script></body>"))))))
;; Make sure attribute contents are html-escaped.
(is (= "<img fake-attr=\"abc"def\">"
(hickory-to-html (as-hickory (first (parse-fragment "<img fake-attr=\"abc"def\">")))))))
(deftest hickory-doctypes-test
(is (= "<!DOCTYPE html>"
(hickory-to-html {:type :document-type
:attrs {:name "html",
:publicid nil
:systemid nil}})))
(is (= "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"
(hickory-to-html {:type :document-type
:attrs {:name "html",
:publicid "-//W3C//DTD HTML 4.01//EN",
:systemid "http://www.w3.org/TR/html4/strict.dtd"}})))
(is (= "<!DOCTYPE html><html><head></head><body></body></html>"
(hickory-to-html (as-hickory (parse "<!DOCTYPE html><html><head></head><body></body></html>")))))
;; Apparently Chrome will parse this doctype as plain html5, so we can't
;; do a roundtrip test in cljs.
#?(:clj (is (= "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\"><html><head></head><body></body></html>"
(hickory-to-html (as-hickory (parse "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\"><html><head></head><body></body</html>")))))))
(deftest error-handling-test
(let [data {:type :foo :tag :a :attrs {:foo "bar"}}]
(is (thrown-with-msg? #?(:clj Exception :cljs :default) #"^Not a valid node: nil"
(hickory-to-html nil)))
(is (thrown-with-msg? #?(:clj Exception :cljs :default) #?(:clj #"^Not a valid node: \{:type :foo, :tag :a, :attrs \{:foo \"bar\"\}\}"
:cljs #"^Not a valid node: \{:type :foo, :tag :a\, :attrs \{:foo \"bar\"\}}")
(hickory-to-html data)))
(is (= data
(try (hickory-to-html data)
(catch #?(:clj Exception :cljs js/Error) e (:dom (ex-data e))))))))
;;
;; Hiccup to HTML
;;
(deftest hiccup-to-html-test
(is (= "<!DOCTYPE html><html><head></head><body><p><!--hi--><a href=\"foo\" id=\"bar\">hi</a></p></body></html>"
(hiccup-to-html (as-hiccup (parse "<!DOCTYPE html><P><!--hi--><a href=foo id=\"bar\">hi")))))
;; Make sure void elements don't have closing tags.
(is (= "<html><head></head><body>Hi<br>There</body></html>"
(hiccup-to-html (as-hiccup (parse "<html><head></head><body>Hi<br>There</body></html>")))))
;; Make sure text is properly escaped.
(is (= "<code><html></code>"
(hiccup-to-html [(as-hiccup (first (parse-fragment "<code><html></code>")))])))
;; Make sure the contents of script/style tags do not get html escaped.
(is (= "<script>Test<!--Test&Test-->Test</script>"
(hiccup-to-html [(as-hiccup
(first (parse-fragment "<body><script>Test<!--Test&Test-->Test</script></body>")))])))
;; Make sure attribute contents are html-escaped.
(is (= "<img fake-attr=\"abc"def\">"
(hiccup-to-html [(as-hiccup (first (parse-fragment "<img fake-attr=\"abc"def\">")))]))))
(deftest hiccup-doctypes-test
(is (= "<!DOCTYPE html><html><head></head><body></body></html>"
(hiccup-to-html (as-hiccup (parse "<!DOCTYPE html><html><head></head><body></body></html>")))))
#?(:clj (is (= "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\"><html><head></head><body></body></html>"
(hiccup-to-html (as-hiccup (parse "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\"><html><head></head><body></body</html>")))))))
================================================
FILE: test/cljc/hickory/test/select.cljc
================================================
(ns hickory.test.select
(:require [hickory.core :as hickory]
[hickory.select :as select]
[hickory.utils :as utils]
[hickory.zip :as hzip]
[clojure.zip :as zip]
[clojure.test :refer [deftest is testing]]))
(def html1
"<!DOCTYPE html>
<!-- Comment 1 -->
<html>
<head></head>
<body>
<h1>Heading</h1>
<p>Paragraph</p>
<a href=\"http://example.com\">Link</a>
<div class=\"aclass bclass cool\">
<span disabled anotherattr=\"\" thirdthing=\"44\" id=\"attrspan\" Capitalized=\"UPPERCASED\">
<div class=\"subdiv cool\" id=\"deepestdiv\">Div</div>
</span>
<!-- Comment 2 -->
<span id=\"anid\" class=\"line-feed-ahead cool\">Span</span>
</div>
</body>
</html>")
(def html2
"<!DOCTYPE html>
<html>
<head></head>
<body>
<p>Paragraph 1</p>
<p>Paragraph 2</p>
<p>Paragraph 3</p>
<p>Paragraph 4</p>
<p>Paragraph 5</p>
<p>Paragraph 6</p>
<p>Paragraph 7</p>
<p>Paragraph 8</p>
</body>
</html>")
(deftest select-next-loc-test
(testing "The select-next-loc function."
(let [htree (hickory/as-hickory (hickory/parse html1))
find-comment-fn (fn [zip-loc]
(= (:type (zip/node zip-loc))
:comment))]
(let [selection (select/select-next-loc find-comment-fn
(hzip/hickory-zip htree))]
(is (and (= :comment
(-> selection zip/node :type))
(re-find #"Comment 1" (-> (zip/node selection)
:content first))))
(let [second-selection (select/select-next-loc find-comment-fn
(zip/next selection))]
(is (and (= :comment
(-> second-selection zip/node :type))
(re-find #"Comment 2" (-> (zip/node second-selection)
:content first))))
(is (nil? (select/select-next-loc find-comment-fn
(zip/next second-selection)))))))))
(deftest select-test
(testing "The select function."
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (fn [zip-loc]
(= (:type (zip/node zip-loc))
:document-type))
htree)]
(is (and (= 1 (count selection))
(= :document-type
(-> selection first :type)))))
(let [selection (select/select (fn [zip-loc]
(= (:type (zip/node zip-loc))
:comment))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :comment (:type %))
selection))))))))
;;
;; Selector tests
;;
(deftest node-type-test
(testing "node-type selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/node-type :document-type)
htree)]
(is (and (= 1 (count selection))
(= :document-type (-> selection first :type)))))
(let [selection (select/select (select/node-type :comment)
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :comment (:type %))
selection))))))))
(deftest tag-test
(testing "tag selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/tag "h1")
htree)]
(is (and (= 1 (count selection))
(= :h1 (-> selection first :tag)))))
;; Case-insensitivity test
(let [selection (select/select (select/tag "H1")
htree)]
(is (and (= 1 (count selection))
(= :h1 (-> selection first :tag)))))
;; Non-string argument test
(let [selection (select/select (select/tag :h1)
htree)]
(is (and (= 1 (count selection))
(= :h1 (-> selection first :tag))))))))
(deftest attr-test
(testing "attr selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/attr :disabled)
htree)]
(is (and (= 1 (count selection))
(= "attrspan" (-> selection first :attrs :id)))))
(let [selection (select/select (select/attr "anotherattr")
htree)]
(is (and (= 1 (count selection))
(= "attrspan" (-> selection first :attrs :id)))))
(let [selection (select/select (select/attr :thirdthing #(= "44" %))
htree)]
(is (and (= 1 (count selection))
(= "attrspan" (-> selection first :attrs :id)))))
;; Case-insensitivity of names and non-equality predicate test
(let [selection (select/select (select/attr "CAPITALIZED"
#(utils/starts-with % "UPPER"))
htree)]
(is (and (= 1 (count selection))
(= "attrspan" (-> selection first :attrs :id)))))
;; Graceful failure to find anything
(let [selection (select/select (select/attr "notpresent"
#(utils/starts-with % "never"))
htree)]
(is (= 0 (count selection)))))))
(deftest id-test
(testing "id selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/id "deepestdiv")
htree)]
(is (and (= 1 (count selection))
(re-find #"deepestdiv"
(-> selection first :attrs :id)))))
(let [selection (select/select (select/id "anid")
htree)]
(is (and (= 1 (count selection))
(re-find #"anid"
(-> selection first :attrs :id)))))
;; Case-insensitivity test
(let [selection (select/select (select/id "ANID")
htree)]
(is (and (= 1 (count selection))
(re-find #"anid"
(-> selection first :attrs :id)))))
;; Non-string argument test
(let [selection (select/select (select/id :anid)
htree)]
(is (and (= 1 (count selection))
(re-find #"anid"
(-> selection first :attrs :id))))))))
(deftest class-test
(testing "class selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/class "aclass")
htree)]
(is (and (= 1 (count selection))
(re-find #"aclass"
(-> selection first :attrs :class)))))
(let [selection (select/select (select/class "cool")
htree)]
(is (and (= 3 (count selection))
(every? #(not (nil? %))
(map #(re-find #"cool"
(-> % :attrs :class))
selection)))))
;; Case-insensitivity test
(let [selection (select/select (select/class "Aclass")
htree)]
(is (and (= 1 (count selection))
(re-find #"aclass"
(-> selection first :attrs :class)))))
;; Non-string argument test
(let [selection (select/select (select/class :aclass)
htree)]
(is (and (= 1 (count selection))
(re-find #"aclass"
(-> selection first :attrs :class)))))
;; class followed by line feed
(let [selection (select/select (select/class :line-feed-ahead)
htree)]
(is (and (= 1 (count selection))
(re-find #"line-feed-ahead"
(-> selection first :attrs :class))))))))
(deftest any-test
(testing "any selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select select/any
htree)]
(is (= 10 (count selection)))))))
(deftest element-child-test
(testing "element-child selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select select/element-child htree)]
(is (= 9 (count selection)))))))
(deftest root-test
(testing "root selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select select/root
htree)]
(is (= :html (-> selection first :tag)))))))
(deftest find-in-text-test
(testing "find-in-text selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/find-in-text #"Heading") htree)]
(is (and (= 1 (count selection))
(= :h1 (-> selection first :tag)))))
(let [selection (select/select (select/find-in-text #"Div") htree)]
(is (and (= 1 (count selection))
(= :div (-> selection first :tag)))))
(let [selection-locs (select/select-locs
(select/child (select/tag :body)
(select/find-in-text #"Paragraph"))
htree)
selection (mapv zip/node selection-locs)]
(is (and (= 1 (count selection))
(= :p (-> selection first :tag))
(= :body (-> selection-locs first zip/up zip/node :tag))))))
(let [htree (hickory/as-hickory (hickory/parse html2))]
(let [selection (select/select (select/find-in-text #"Paragraph") htree)]
(is (and (= 8 (count selection))
(every? #(= :p %) (map :tag selection))))))))
(deftest n-moves-until-test
(testing "n-moves-until selector"
;; This function is actually pretty well exercised by nth-child, etc.
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/and (select/tag :div)
(select/n-moves-until 0 6
zip/up
nil?))
htree)]
(is (= "deepestdiv" (-> selection first :attrs :id)))))))
(deftest nth-of-type-test
(testing "nth-of-type selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/nth-of-type 1 :body)
htree)]
(is (and (= 1 (count selection))
(= :body (:tag (first selection)))))))))
(deftest nth-last-of-type-test
(testing "nth-last-of-type selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/nth-last-of-type 1 :span)
htree)]
(is (and (= 1 (count selection))
(= "anid" (-> selection first :attrs :id))))))))
(deftest nth-child-test
(testing "nth-child selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/and (select/tag :div)
(select/nth-child 0 1))
htree)]
(is (and (= 1 (count selection))
(= "deepestdiv" (-> selection first :attrs :id)))))
(let [selection (select/select (select/and (select/tag :div)
(select/nth-child 1 1))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :div (:tag %))
selection)))))
(let [selection (select/select (select/and (select/tag :div)
(select/nth-child :odd))
htree)]
(is (and (= 1 (count selection))
(= "deepestdiv" (-> selection first :attrs :id)))))
(let [selection (select/select (select/and (select/node-type :element)
(select/nth-child :even))
htree)]
(is (and (= 4 (count selection))
(= :element (-> selection first :type))))))
(let [htree (hickory/as-hickory (hickory/parse html2))]
(let [selection (select/select (select/and (select/node-type :element)
(select/nth-child :even))
htree)]
(is (and (= 5 (count selection))
(every? true? (map #(contains? #{:body :p} (:tag %))
selection)))))
(let [selection (select/select (select/nth-child 3 0)
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :p (:tag %))
selection)))))
(let [selection (select/select (select/child (select/tag :body)
(select/nth-child 3 1))
htree)]
(is (and (= 3 (count selection))
(every? true? (map #(= :p (:tag %))
selection))))))))
(deftest nth-last-child-test
(testing "nth-last-child selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/and (select/tag :div)
(select/nth-last-child 0 1))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :div (:tag %)) selection)))))
(let [selection (select/select (select/and (select/tag :div)
(select/nth-last-child 1 1))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :div (:tag %))
selection)))))
(let [selection (select/select (select/and (select/tag :div)
(select/nth-last-child :odd))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :div (:tag %)) selection)))))
(let [selection (select/select (select/and (select/node-type :element)
(select/nth-last-child :even))
htree)]
(is (and (= 4 (count selection))
(= :element (-> selection first :type))))))
(let [htree (hickory/as-hickory (hickory/parse html2))]
(let [selection (select/select (select/and (select/node-type :element)
(select/nth-last-child :even))
htree)]
(is (and (= 5 (count selection))
(every? true? (map #(contains? #{:head :p} (:tag %))
selection)))))
(let [selection (select/select (select/nth-last-child 3 0)
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :p (:tag %))
selection)))))
(let [selection (select/select (select/child (select/tag :body)
(select/nth-last-child 3 1))
htree)]
(is (and (= 3 (count selection))
(every? true? (map #(= :p (:tag %))
selection))))))))
(deftest first-child-test
(testing "first-child selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/child (select/tag :div)
select/first-child)
htree)]
(is (and (= 1 (count selection))
(= "attrspan" (-> selection first :attrs :id))))))))
(deftest last-child-test
(testing "last-child selector"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/child (select/tag :div)
select/last-child)
htree)]
(is (and (= 1 (count selection))
(= "anid" (-> selection first :attrs :id))))))))
;;
;; Selector Combinators
;;
(deftest and-test
(testing "and selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/and (select/tag :div))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :div (:tag %)) selection)))))
(let [selection (select/select (select/and (select/tag :div)
(select/class "bclass"))
htree)]
(is (and (= 1 (count selection))
(re-find #"bclass"
(-> selection first :attrs :class)))))
(let [selection (select/select (select/and (select/class "cool")
(select/tag :span))
htree)]
(is (and (= 1 (count selection))
(= "anid" (-> selection first :attrs :id)))))
(let [selection (select/select (select/and (select/class "cool")
(select/tag :span)
(select/id :attrspan))
htree)]
(is (= [] selection))))))
(deftest or-test
(testing "or selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/or (select/tag :a)
(select/class "notpresent")
(select/id :nothere))
htree)]
(is (and (= 1 (count selection))
(every? true? (map #(= :a (:tag %)) selection)))))
(let [selection (select/select (select/or (select/tag :div))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :div (:tag %)) selection)))))
(let [selection (select/select (select/or (select/id "deepestdiv")
(select/class "bclass"))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :div (:tag %)) selection))))))))
(deftest not-test
(testing "not selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/and (select/node-type :element)
(select/not (select/class :cool)))
htree)]
(is (and (= 7 (count selection))
(every? true? (map #(and (= :element (-> % :type))
(or (not (-> % :attrs :class))
(not (re-find #"cool"
(-> % :attrs :class)))))
selection)))))
(let [selection (select/select (select/el-not (select/class :cool))
htree)]
(is (and (= 7 (count selection))
(every? true? (map #(and (= :element (-> % :type))
(or (not (-> % :attrs :class))
(not (re-find #"cool"
(-> % :attrs :class)))))
selection))))))))
(deftest ordered-adjacent-test
(testing "ordered-adjacent selector combinator"
;; This is pretty well tested by the tests for child and others.
(let [htree (hickory/as-hickory (hickory/parse html1))]
;; Select body tag that is just after a head tag.
(let [selection (select/select
(select/ordered-adjacent
#(select/left-of-node-type % :element)
(select/tag :body)
(select/tag :head))
htree)]
(is (and (= 1 (count selection))
(= :body (-> selection first :tag))))))))
(deftest child-test
(testing "child selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/child (select/el-not select/any))
htree)]
(is (= [] selection)))
(let [selection (select/select (select/child (select/tag :html)
(select/tag :div)
(select/tag :span))
htree)]
(is (= [] selection)))
(let [selection (select/select (select/child (select/tag :body)
(select/tag :div)
(select/tag :span))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :span (:tag %)) selection)))))
(let [selection (select/select (select/child (select/tag :div)
select/any)
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(or (= :span (-> % :tag))
(= :div (-> % :tag)))
selection))))))
;; Check examples from the doc string.
(let [htree (-> "<div><span class=\"foo\"><input disabled></input></span></div>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/child (select/tag :div)
(select/class :foo)
(select/attr :disabled))
htree)]
(is (= :input (-> selection first :tag)))))
(let [htree (-> "<div><span class=\"foo\"><b><input disabled></input></b></span></div>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/child (select/tag :div)
(select/class :foo)
(select/attr :disabled))
htree)]
(is (= [] selection))))))
(deftest has-child-test
(testing "has-child selector combinator"
(let [docs ["<div id=\"outermost\"><div><span id=\"innermost\"></span></div></div>"
"<div id=\"outermost\"><div><span id=\"innermost\"></span></div><span id=\"sib\"></span></div>"
"<div id=\"outermost\"><span id=\"sib\"></span><div><span id=\"innermost\"></span></div></div>"]]
(doseq [doc docs]
(let [htree (-> doc
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/has-child
(select/id :innermost))
htree)]
(is (and (= 1 (count selection))
(every? true? (map #(= :div (-> % :tag)) selection)))))
;; Check that a descendant selector can peer up past the
;; node having its descendants examined.
(let [selection (select/select (select/has-child
(select/descendant (select/id :outermost)
(select/id :innermost)))
htree)]
(is (and (= 1 (count selection))
(every? true? (map #(= :div (-> % :tag)) selection)))))
(let [selection (select/select (select/has-child (select/tag :a))
htree)]
(is (= [] selection))))))))
(deftest parent-test
(testing "parent selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/parent (select/el-not select/any))
htree)]
(is (= [] selection)))
(let [selection (select/select (select/parent (select/tag :html)
(select/tag :div)
(select/tag :span))
htree)]
(is (= [] selection)))
(let [selection (select/select (select/parent (select/tag :body)
(select/tag :div)
(select/tag :span))
htree)]
(is (and (= 1 (count selection))
(every? true? (map #(= :body (:tag %)) selection)))))
(let [selection (select/select (select/parent (select/tag :div)
select/any)
htree)]
(is (and (= 1 (count selection))
(every? true? (map #(= :div (-> % :tag))
selection)))))
;; Find any element that is a parent of another element
(let [selection (select/select (select/parent select/any select/any)
htree)]
(is (and (= 4 (count selection))
(every? true? (mapv #(or (= :html (-> % :tag))
(= :body (-> % :tag))
(= :div (-> % :tag))
(= :span (-> % :tag)))
selection))))))
;; Check examples from the doc string.
(let [htree (-> "<div><span class=\"foo\"><input disabled></input></span></div>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/parent (select/tag :div)
(select/class :foo)
(select/attr :disabled))
htree)]
(is (= :div (-> selection first :tag)))))
(let [htree (-> "<div><span class=\"foo\"><b><input disabled></input></b></span></div>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/parent (select/tag :div)
(select/class :foo)
(select/attr :disabled))
htree)]
(is (= [] selection))))))
(deftest follow-adjacent-test
(testing "follow-adjacent selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/follow-adjacent (select/tag :head)
(select/tag :body))
htree)]
(is (and (= 1 (count selection))
(= :body (-> selection first :tag))))))
;; Check the examples from the doc string.
(let [htree (-> "<div>...</div><span class=\"foo\">...</span>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/follow-adjacent (select/tag :div)
(select/class "foo"))
htree)]
(is (= :span (-> selection first :tag)))))
(let [htree (-> "<div>...</div><b>...</b><span class=\"foo\">...</span>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/follow-adjacent (select/tag :div)
(select/class "foo"))
htree)]
(is (= [] selection))))))
(deftest precede-adjacent-test
(testing "precede-adjacent selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/precede-adjacent (select/tag :head)
(select/tag :body))
htree)]
(is (and (= 1 (count selection))
(= :head (-> selection first :tag))))))
;; Check the examples from the doc string.
(let [htree (-> "<div>...</div><span class=\"foo\">...</span>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/precede-adjacent (select/tag :div)
(select/class "foo"))
htree)]
(is (and (= 1 (count selection))
(= :div (-> selection first :tag))))))
(let [htree (-> "<div>...</div><b>...</b><span class=\"foo\">...</span>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/precede-adjacent (select/tag :div)
(select/class "foo"))
htree)]
(is (= [] selection))))))
(deftest ordered-test
;; Just a basic tire kick here, it gets exercised by descendant, follow, and
;; precede.
(testing "ordered selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select
(select/ordered
#(select/left-of-node-type % :element)
(select/tag :body)
(select/tag :head))
htree)]
(is (and (= 1 (count selection))
(= :body (-> selection first :tag))))))))
(deftest descendant-test
(testing "descendant selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/descendant (select/tag :h1))
htree)]
(is (and (= 1 (count selection))
(= :h1 (-> selection first :tag)))))
(let [selection (select/select (select/descendant (select/class "cool")
(select/tag :div))
htree)]
(is (= 1 (count selection))
(= "deepestdiv" (-> selection first :attrs :id))))
(let [selection (select/select (select/descendant (select/tag :div)
select/any)
htree)]
(is (= 3 (count selection)))))
;; Check examples from doc string.
(let [htree (-> "<div><span class=\"foo\"><input disabled></input></span></div>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/descendant (select/tag :div)
(select/class :foo)
(select/attr :disabled))
htree)]
(is (and (= 1 (count selection))
(= :input (-> selection first :tag))))))
(let [htree (-> "<div><span class=\"foo\"><b><input disabled></input></b></span></div>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/descendant (select/tag :div)
(select/class :foo)
(select/attr :disabled))
htree)]
(is (and (= 1 (count selection))
(= :input (-> selection first :tag))))))))
(deftest has-descendant-test
(testing "has-descendant selector combinator"
(let [docs ["<div id=\"outermost\"><div><span id=\"innermost\"></span></div></div>"
"<div id=\"outermost\"><div><span id=\"innermost\"></span></div><span id=\"sib\"></span></div>"
"<div id=\"outermost\"><span id=\"sib\"></span><div><span id=\"innermost\"></span></div></div>"]]
(doseq [doc docs]
(let [htree (-> doc
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/and (select/tag :div)
(select/has-descendant
(select/id :innermost)))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :div (-> % :tag)) selection)))))
;; Check that a descendant selector can peer up past the
;; node having its descendants examined.
(let [selection (select/select (select/and (select/tag :div)
(select/has-descendant
(select/descendant (select/id :outermost)
(select/tag :span))))
htree)]
(is (and (= 2 (count selection))
(every? true? (map #(= :div (-> % :tag)) selection)))))
(let [selection (select/select (select/has-descendant (select/tag :a))
htree)]
(is (= [] selection))))))))
(deftest ancestor-test
(testing "ancestor selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/ancestor (select/tag :h1))
htree)]
(is (and (= 1 (count selection))
(= :h1 (-> selection first :tag)))))
(let [selection (select/select (select/ancestor (select/class "cool")
(select/tag :div))
htree)]
(is (= 1 (count selection))
(= "deepestdiv" (-> selection first :attrs :id))))
(let [selection (select/select (select/ancestor (select/tag :div)
select/any)
htree)]
(is (= 1 (count selection))))
(let [selection (select/select (select/ancestor (select/tag :span))
htree)]
(is (= 2 (count selection))))
;; Find any element that is a parent of another element
(let [selection (select/select (select/parent select/any select/any)
htree)]
(is (and (= 4 (count selection))
(every? true? (mapv #(or (= :html (-> % :tag))
(= :body (-> % :tag))
(= :div (-> % :tag))
(= :span (-> % :tag)))
selection))))))
;; Check examples from doc string.
(let [htree (-> "<div><span class=\"foo\"><input disabled></input></span></div>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/ancestor (select/tag :div)
(select/class :foo)
(select/attr :disabled))
htree)]
(is (and (= 1 (count selection))
(= :div (-> selection first :tag))))))
(let [htree (-> "<div><span class=\"foo\"><b><input disabled></input></b></span></div>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/ancestor (select/tag :div)
(select/class :foo)
(select/attr :disabled))
htree)]
(is (and (= 1 (count selection))
(= :div (-> selection first :tag))))))))
(deftest follow-test
(testing "follow selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/follow (select/tag :head)
(select/tag :body))
htree)]
(is (and (= 1 (count selection))
(= :body (-> selection first :tag))))))
;; Check the examples from the doc string.
(let [htree (-> "<div>...</div><span class=\"foo\">...</span>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/follow (select/tag :div)
(select/class "foo"))
htree)]
(is (= :span (-> selection first :tag)))))
(let [htree (-> "<div>...</div><b>...</b><span class=\"foo\">...</span>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/follow (select/tag :div)
(select/class "foo"))
htree)]
(is (= :span (-> selection first :tag)))))))
(deftest precede-test
(testing "precede selector combinator"
(let [htree (hickory/as-hickory (hickory/parse html1))]
(let [selection (select/select (select/precede (select/tag :head)
(select/tag :body))
htree)]
(is (and (= 1 (count selection))
(= :head (-> selection first :tag))))))
;; Check the examples from the doc string.
(let [htree (-> "<div>...</div><span class=\"foo\">...</span>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/precede (select/tag :div)
(select/class "foo"))
htree)]
(is (and (= 1 (count selection))
(= :div (-> selection first :tag))))))
(let [htree (-> "<div>...</div><b>...</b><span class=\"foo\">...</span>"
hickory/parse hickory/as-hickory)]
(let [selection (select/select (select/precede (select/tag :div)
(select/class "foo"))
htree)]
(is (= :div (-> selection first :tag)))))))
(deftest graceful-boundaries-test
;; Testing some problematic expressions to make sure they gracefully
;; return empty results.
(let [hick (-> (hickory/parse-fragment "<a><img href=\"\"/></a>")
first
hickory/as-hickory)]
(is (= []
(select/select (select/child
(select/follow-adjacent (select/tag :a)
(select/tag :img)))
hick)))
(is (= []
(select/select (select/child
(select/follow-adjacent (select/tag :nonexistent)
(select/tag :img)))
hick)))
(is (= []
(select/select (select/child
(select/follow-adjacent (select/tag :a)
(select/tag :nonexistent)))
hick)))
(is (= [{:type :element, :attrs {:href ""}, :tag :img, :content nil}]
(select/select (select/child select/first-child) hick)))
(is (= [{:type :element, :attrs {:href ""}, :tag :img, :content nil}]
(select/select (select/child select/last-child) hick)))))
================================================
FILE: test/cljc/hickory/test/zip.cljc
================================================
(ns hickory.test.zip
(:require [clojure.zip :as zip]
[hickory.core :refer [as-hiccup as-hickory parse]]
[hickory.zip :refer [hickory-zip hiccup-zip]]
[clojure.test :refer [deftest is]]))
(deftest hickory-zipper
(is (= {:type :document,
:content [{:type :element,
:attrs nil,
:tag :html,
:content [{:type :element,
:attrs nil,
:tag :head,
:content nil}
{:type :element,
:attrs nil,
:tag :body,
:content [{:type :element,
:attrs nil,
:tag :a,
:content nil}]}]}]}
(zip/node (hickory-zip (as-hickory (parse "<a>"))))))
(is (= {:type :element,
:attrs nil,
:tag :html,
:content [{:type :element,
:attrs nil,
:tag :head,
:content nil}
{:type :element,
:attrs nil,
:tag :body,
:content [{:type :element,
:attrs nil,
:tag :a,
:content nil}]}]}
(-> (hickory-zip (as-hickory (parse "<a>")))
zip/next zip/node)))
(is (= {:type :element, :attrs nil, :tag :head, :content nil}
(-> (hickory-zip (as-hickory (parse "<a>")))
zip/next zip/next zip/node)))
(is (= {:type :element,
:attrs nil,
:tag :body,
:content [{:type :element,
:attrs nil,
:tag :a,
:content nil}]}
(-> (hickory-zip (as-hickory (parse "<a>")))
zip/next zip/next zip/next zip/node)))
(is (= {:type :element,
:attrs nil,
:tag :html,
:content [{:type :element,
:attrs nil,
:tag :head,
:content nil}
{:type :element,
:attrs nil,
:tag :body,
:content [{:type :element,
:attrs nil,
:tag :a,
:content nil}]}]}
(-> (hickory-zip (as-hickory (parse "<a>")))
zip/next zip/next zip/next zip/up zip/node))))
(deftest hiccup-zipper
(is (= '([:html {} [:head {}] [:body {} [:a {}]]])
(zip/node (hiccup-zip (as-hiccup (parse "<a>"))))))
(is (= [:html {} [:head {}] [:body {} [:a {}]]]
(-> (hiccup-zip (as-hiccup (parse "<a>")))
zip/next zip/node)))
(is (= [:head {}]
(-> (hiccup-zip (as-hiccup (parse "<a>")))
zip/next zip/next zip/node)))
(is (= [:body {} [:a {}]]
(-> (hiccup-zip (as-hiccup (parse "<a>")))
zip/next zip/next zip/next zip/node)))
(is (= [:html {} [:head {}] [:body {} [:a {}]]]
(-> (hiccup-zip (as-hiccup (parse "<a>")))
zip/next zip/next zip/next zip/up zip/node))))
================================================
FILE: test/cljs/hickory/advanced.edn
================================================
{:optimizations :advanced}
================================================
FILE: test/cljs/hickory/doo_runner.cljs
================================================
(ns hickory.doo-runner
(:require [doo.runner :refer-macros [doo-tests]]
[hickory.test.convert]
[hickory.test.core]
[hickory.test.hiccup-utils]
[hickory.test.render]
[hickory.test.select]
[hickory.test.zip]))
(doo-tests 'hickory.test.core
'hickory.test.convert
'hickory.test.hiccup-utils
'hickory.test.render
'hickory.test.select
'hickory.test.zip)
gitextract_ux8kf796/
├── .circleci/
│ └── config.yml
├── .clj-kondo/
│ ├── babashka/
│ │ └── fs/
│ │ └── config.edn
│ └── rewrite-clj/
│ └── rewrite-clj/
│ └── config.edn
├── .github/
│ ├── CODEOWNERS
│ └── workflows/
│ └── ci.yml
├── .gitignore
├── API.md
├── LICENSE
├── ORIGINATOR
├── README.md
├── bb.edn
├── deps.edn
├── package.json
├── project.clj
├── src/
│ ├── clj/
│ │ └── hickory/
│ │ └── core.clj
│ ├── cljc/
│ │ └── hickory/
│ │ ├── convert.cljc
│ │ ├── hiccup_utils.cljc
│ │ ├── render.cljc
│ │ ├── select.cljc
│ │ ├── utils.cljc
│ │ └── zip.cljc
│ └── cljs/
│ └── hickory/
│ └── core.cljs
└── test/
├── cljc/
│ └── hickory/
│ └── test/
│ ├── convert.cljc
│ ├── core.cljc
│ ├── hiccup_utils.cljc
│ ├── render.cljc
│ ├── select.cljc
│ └── zip.cljc
└── cljs/
└── hickory/
├── advanced.edn
└── doo_runner.cljs
Condensed preview — 30 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (184K chars).
[
{
"path": ".circleci/config.yml",
"chars": 3712,
"preview": "# Clojure CircleCI 2.0 configuration file\n#\n# Check https://circleci.com/docs/2.0/language-clojure/ for more details\n#\nv"
},
{
"path": ".clj-kondo/babashka/fs/config.edn",
"chars": 56,
"preview": "{:lint-as {babashka.fs/with-temp-dir clojure.core/let}}\n"
},
{
"path": ".clj-kondo/rewrite-clj/rewrite-clj/config.edn",
"chars": 186,
"preview": "{:lint-as\n {rewrite-clj.zip/subedit-> clojure.core/->\n rewrite-clj.zip/subedit->> clojure.core/->>\n rewrite-clj.zip/ed"
},
{
"path": ".github/CODEOWNERS",
"chars": 11,
"preview": "* @port19x\n"
},
{
"path": ".github/workflows/ci.yml",
"chars": 1345,
"preview": "name: ci\n\non: [push, pull_request]\n\njobs:\n\n clojure:\n\n strategy:\n matrix:\n os: [ubuntu-latest]\n\n runs"
},
{
"path": ".gitignore",
"chars": 253,
"preview": "/target\n/lib\n/classes\n/checkouts\n/codox-out\npom.xml\n*.jar\n*.class\n.lein-deps-sum\n.lein-failures\n.lein-plugins\n.lein-repl"
},
{
"path": "API.md",
"chars": 7412,
"preview": "# Table of contents\n- [`hickory.core`](#hickory.core) \n - [`Attribute`](#hickory.core/attribute)\n - [`Comment`]"
},
{
"path": "LICENSE",
"chars": 11287,
"preview": "Source code distributed under the Eclipse Public License - v 1.0:\n\nTHE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS "
},
{
"path": "ORIGINATOR",
"chars": 15,
"preview": "@davidsantiago\n"
},
{
"path": "README.md",
"chars": 22645,
"preview": "[](https://clojars.org/org.clj-commons/h"
},
{
"path": "bb.edn",
"chars": 1074,
"preview": "{:deps {io.github.borkdude/lein2deps {:git/url \"https://github.com/borkdude/lein2deps\"\n "
},
{
"path": "deps.edn",
"chars": 591,
"preview": "{:paths [\"src/clj\" \"src/cljc\" \"src/cljs\"]\n :deps\n {org.clojure/clojure {:mvn/version \"1.11.1\"}\n org.jsoup/jsoup {:mvn/v"
},
{
"path": "package.json",
"chars": 152,
"preview": "{\n \"devDependencies\": {\n \"karma\": \"^6.4.1\",\n \"karma-chrome-launcher\": \"^3.1.1\",\n \"karma-cli\": \"^2.0.0\",\n \"k"
},
{
"path": "project.clj",
"chars": 901,
"preview": "(defproject org.clj-commons/hickory (or (System/getenv \"PROJECT_VERSION\") \"0.7.1\")\n :description \"HTML as Data\"\n :url "
},
{
"path": "src/clj/hickory/core.clj",
"chars": 7241,
"preview": "(ns hickory.core\n (:require [hickory.utils :as utils]\n [hickory.zip :as hzip]\n [clojure.zip :as z"
},
{
"path": "src/cljc/hickory/convert.cljc",
"chars": 2179,
"preview": "(ns hickory.convert\n \"Functions to convert from one representation to another.\"\n (:require [hickory.render :as render]"
},
{
"path": "src/cljc/hickory/hiccup_utils.cljc",
"chars": 8050,
"preview": "(ns hickory.hiccup-utils\n \"Utilities for working with hiccup forms.\"\n (:require [clojure.string :as str]))\n\n(defn- fir"
},
{
"path": "src/cljc/hickory/render.cljc",
"chars": 5306,
"preview": "(ns hickory.render\n (:require [hickory.hiccup-utils :as hu]\n [hickory.utils :as utils]\n [clojure."
},
{
"path": "src/cljc/hickory/select.cljc",
"chars": 29241,
"preview": "(ns hickory.select\n \"Functions to query hickory-format HTML data.\n\n See clojure.zip for more information on zippers, "
},
{
"path": "src/cljc/hickory/utils.cljc",
"chars": 2112,
"preview": "(ns hickory.utils\n \"Miscellaneous utilities used internally.\"\n (:require [clojure.string :as string]\n #?(:c"
},
{
"path": "src/cljc/hickory/zip.cljc",
"chars": 2032,
"preview": "(ns hickory.zip\n (:require [clojure.zip :as zip]))\n\n;;\n;; Hickory\n;;\n\n(defn hickory-zip\n \"Returns a zipper for html do"
},
{
"path": "src/cljs/hickory/core.cljs",
"chars": 7601,
"preview": "(ns hickory.core\n (:require [hickory.utils :as utils]\n [clojure.zip :as zip]\n [goog.string :as gs"
},
{
"path": "test/cljc/hickory/test/convert.cljc",
"chars": 2289,
"preview": "(ns hickory.test.convert\n (:require\n [clojure.test :refer [deftest is]]\n [hickory.convert :refer [hiccup-fragment-t"
},
{
"path": "test/cljc/hickory/test/core.cljc",
"chars": 7112,
"preview": "(ns hickory.test.core\n (:require [hickory.core :refer [as-hickory as-hiccup parse parse-fragment]]\n [clojure"
},
{
"path": "test/cljc/hickory/test/hiccup_utils.cljc",
"chars": 3751,
"preview": "(ns hickory.test.hiccup-utils\n (:require [hickory.hiccup-utils :refer [class-names id normalize-form\n "
},
{
"path": "test/cljc/hickory/test/render.cljc",
"chars": 4999,
"preview": "(ns hickory.test.render\n (:require\n [clojure.test :refer [deftest is]]\n [hickory.core :refer [as-hiccup as-hickory "
},
{
"path": "test/cljc/hickory/test/select.cljc",
"chars": 41007,
"preview": "(ns hickory.test.select\n (:require [hickory.core :as hickory]\n [hickory.select :as select]\n [hick"
},
{
"path": "test/cljc/hickory/test/zip.cljc",
"chars": 3333,
"preview": "(ns hickory.test.zip\n (:require [clojure.zip :as zip]\n [hickory.core :refer [as-hiccup as-hickory parse]]\n "
},
{
"path": "test/cljs/hickory/advanced.edn",
"chars": 27,
"preview": "{:optimizations :advanced}\n"
},
{
"path": "test/cljs/hickory/doo_runner.cljs",
"chars": 479,
"preview": "(ns hickory.doo-runner\n (:require [doo.runner :refer-macros [doo-tests]]\n [hickory.test.convert]\n "
}
]
About this extraction
This page contains the full source code of the clj-commons/hickory GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 30 files (172.3 KB), approximately 44.7k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.