Repository: dwd/rapidxml Branch: master Commit: a304c197c706 Files: 27 Total size: 360.9 KB Directory structure: gitextract_2ljvjjz2/ ├── .github/ │ └── workflows/ │ └── gtest.yml ├── .gitignore ├── README.md ├── conanfile.py ├── include/ │ ├── flxml/ │ │ ├── generator.h │ │ ├── iterators.h │ │ ├── predicates.h │ │ ├── print.h │ │ ├── tables.h │ │ ├── utils.h │ │ └── wrappers.h │ ├── flxml.h │ ├── rapidxml.hpp │ └── rapidxml_print.hpp ├── license.txt ├── manual.html └── test/ ├── CMakeLists.txt ├── conanfile.py ├── sonar-project.properties └── src/ ├── iterators.cpp ├── low-level-parse.cpp ├── main.cc ├── manipulations.cpp ├── parse-simple.cpp ├── perf.cpp ├── round-trips.cpp └── xpath.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/gtest.yml ================================================ name: gtest on: - push - pull_request - release jobs: gtest: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 - name: Figure out version id: tag run: | TAG=$(git describe --tags --abbrev=0) COMMITS_SINCE_TAG=$(git rev-list ${TAG}..HEAD --count) if [ "${COMMITS_SINCE_TAG}" -eq 0 ]; then echo "VERSION=${TAG}" >> $GITHUB_ENV else echo "VERSION="$(git describe --tags --abbrev=8) >> $GITHUB_ENV fi - name: Cache Conan2 dependencies uses: actions/cache@v3 with: path: ~/.conan2 key: ${{ runner.os }}-conan2-${{ hashFiles('**/conanfile.py') }} restore-keys: | ${{ runner.os }}-conan2- - name: Set up Python 3.8 for gcovr uses: actions/setup-python@v4 - name: SonarQube install uses: SonarSource/sonarcloud-github-c-cpp@v3 - name: Install Conan run: pip install conan - name: Configure Conan Profile run: | conan profile detect -e conan remote add conan-nexus https://nexus.cridland.io/repository/dwd-conan --force conan remote login conan-nexus ci --password ${{ secrets.NEXUS_PASSWORD }} - name: Conan Deps run: conan install . --output-folder=gh-build -s build_type=Debug -s compiler.cppstd=gnu23 -b missing --version=${{ env.VERSION }} - name: Create package run: conan create . --version=${{ env.VERSION }} - name: Conan deps for tests run: cd test && conan install . --output-folder=. -s build_type=Debug -s compiler.cppstd=gnu23 -b missing --version=${{ env.VERSION }} - name: CMake tests run: cd test && cmake -B gh-build -DCMAKE_BUILD_TYPE=Debug - name: Build Wrapper run: cd test && build-wrapper-linux-x86-64 --out-dir sonar-out cmake --build gh-build - name: Sonar Scanner run: cd test && sonar-scanner --define sonar.cfamily.compile-commands=sonar-out/compile_commands.json env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} - name: Run Tests run: cd test/gh-build && ./rapidxml-test - name: Upload run: conan upload -r conan-nexus --confirm 'flxml/*' ================================================ FILE: .gitignore ================================================ /cmake-build-debug/ /gtest-build/ ================================================ FILE: README.md ================================================ # FLXML ## Or -- RapidXML, Dave's Version Hey! This is a fork of RapidXML, an ancient C++ library for parsing XML quickly and flexibly. To distinguish, this version is called "FLXML", for "Fast/Light XML". Hey, it's a name. There's a lot of forks of this around, and I (Dave Cridland) didn't write the vast majority of this library - instead, it was written by someone called Marcin Kalicinski, and his copyright is dated 2009. ## Version 2, Breaking Changes This is version 2.x. You might not want this, since it introduces a number of breaking changes from rapidxml. The rapidxml-like library is available, with breaking changes, by including `rapidxml.hpp` as before, within the `rapidxml` namespace - however this is an alias to the `flxml` namespace defined in `flxml.h`. It has breaking changes, the largest of which are: * No more case insensitive option. Really, nobody should be using XML case insensitively anyway, but it was too difficult to keep around, sorry. * Instead of passing around potentially unterminated character pointers with optional lengths, we now use std::basic_string_view * There is no need for string termination, now, so the parse function never terminates, and that option has vanished. * Return values that were previously bare pointers are now a safe wrapped pointer which ordinarily will check/throw for nullptr. * append/prepend/insert_node now also have an append/prepend/insert_element shorthand, which will allow an XML namespace to be included if wanted. * Parsing data can be done from a container as well as a NUL-terminated buffer. A NUL-terminated buffer remains slightly faster, and will be used if possible (for example, if you pass ina std::basic_string, it'll call c_str() on it and do that). Not breaking, but kind of nice: * The parse buffer is now treated as const, and will never be mutated. This incurs a slight performance penalty for handling long text values that have an encoded entity late in the string. * The iterators library is now included by default, and updated to m_handle most idiomatic modern C++ operations. Internal changes: * There is no longer a internal::measure or internal::compare; these just use the std::char_traits functions as used by the string_views. * Reserialization (that is, using the rapidxml::print family on a tree that is mostly or entirely from parsing) is now much faster, and will optimize itself to use simple buffer copies where the data is unchanged from parsing. * Alignment of the allocator uses C++11's alignof/std::align, and so should be more portable. New features: * Instead of the `doc->allocate_node` / `node->append_node` dance, you can now `node->append_element(name, value)`, where `name` can be either a `string` (or `string_view`, etc) or a tuple like {xmlns, local_name}, which will set an xmlns attribute if needed. * There's a xpathish thing going on in `flxml/predicates.h`, which lets you search for (or iterate through) elements using a trivial subset of XPath. * You can get access to containerish things in rapidxml_iterators by methods on nodes/documents, as `node.children()`, `node.attributes()` and a new `node.descendants()`. ### Fun The rapidxml_iterators library is now included in `flxml.h`, and you can do amusing things like: ```c++ for (auto & child : node.children()) { if (child.name() == "potato") scream_for(joy); } ``` More in [test/iterators.cpp](./test/iterators.cpp) Of course, in this case it might be simpler to: ```c++ auto xpath = flxml::xpath::parse("/potato"); for (auto & child : xp->all(node)) { scream_for(joy); } ``` More of that in [test/xpath.cpp](./test/xpath.cpp) For those of us who lose track of the buffer sometimes, clone_node() now takes an optional second argument of "true" if you want to also clone the strings. Otherwise, nodes will use string_views which reference the original parsed buffer. ### Gotchas The functions like find_node and name(...) that took a Ch * and optional length now take only a std::basic_string_view. Typical usage passed in 0, NULL, or nullptr for unwanted values; this will now segfault on C++20 and earlier - use C++23 ideally, but you can pass in {} instead. This should probably be a std::optional> instead. ## Changes to the original I needed a library for fast XMPP processing (reading, processing, and reserializing), and this mostly fit the bill. However, not entirely, so this version adds: * XML Namespace support * An additional parse mode flag for doing shallow parsing. * An additional parse mode flag for extracting just one (child) element. ## Tests The other thing this fork added was a file of simple tests, which I've recently rewritten into GoogleTest. The original makes reference to an expansive test suite, but this was not included in the open source release. I'll expand these tests as and when I need to. The tests use a driver which can optionally use Sentry for performance/error tracking; to enable, use the CMake option RAPIDXML_SENTRY, and clone the [sentry-native](https://github.com/getsentry/sentry-native) repository into the root, and when running `rapidxml-test`, set SENTRY_DSN in the environment. The tests are in a different Conan package, to keep things light and simple. ## Pull Requests Pull request are very welcome, but do ensure you're happy with the licensing first. ================================================ FILE: conanfile.py ================================================ from conan import ConanFile from conan.tools.files import copy class FLXML(ConanFile): name = "flxml" exports_sources = "include/*" no_copy_source = True def package(self): copy(self, "include/*.hpp", self.source_folder, self.package_folder) copy(self, "include/*.h", self.source_folder, self.package_folder) def package_info(self): self.cpp_info.includedirs = ['include'] self.cpp_info.libdirs = [] self.cpp_info.bindirs = [] ================================================ FILE: include/flxml/generator.h ================================================ // // Created by dave on 29/07/2024. // #ifndef RAPIDXML_RAPIDXML_GENERATOR_HPP #define RAPIDXML_RAPIDXML_GENERATOR_HPP #include #include namespace flxml { template class generator { public: using value_pointer = std::remove_reference::type *; struct handle_type; struct promise_type { value_pointer value; std::suspend_always yield_value(T & v) { value = &v; return {}; } std::suspend_never initial_suspend() { return {}; } std::suspend_always final_suspend() noexcept { return {}; // Change this to std::suspend_always } void return_void() {} void unhandled_exception() { std::terminate(); } generator get_return_object() { return generator{handle_type{handle_type::from_promise(*this)}}; } }; struct handle_type : std::coroutine_handle { explicit handle_type(std::coroutine_handle && h) : std::coroutine_handle(std::move(h)) {} T &operator*() { return *(this->promise().value); } void operator++() { this->resume(); } bool operator!=(std::default_sentinel_t) const { return !this->done(); } }; explicit generator(handle_type h) : m_handle(h) {} ~generator() { if (m_handle) m_handle.destroy(); } handle_type begin() { return m_handle; } std::default_sentinel_t end() { return std::default_sentinel; } private: handle_type m_handle{}; }; } #endif //RAPIDXML_RAPIDXML_GENERATOR_HPP ================================================ FILE: include/flxml/iterators.h ================================================ #ifndef RAPIDXML_ITERATORS_HPP_INCLUDED #define RAPIDXML_ITERATORS_HPP_INCLUDED // Copyright (C) 2006, 2009 Marcin Kalicinski // Version 1.13 // Revision $DateTime: 2009/05/13 01:46:17 $ //! \file rapidxml_iterators.hpp This file contains rapidxml iterators #include namespace flxml { //! Iterator of child nodes of xml_node template class node_iterator { public: using value_type = xml_node; using reference = xml_node &; using pointer = xml_node *; using iterator_category = std::bidirectional_iterator_tag; using difference_type = long; node_iterator() : m_node() { } explicit node_iterator(xml_node const &node) : m_node(node.first_node()) { } node_iterator(node_iterator && other) noexcept : m_node(other.m_node) {} node_iterator(node_iterator const & other) : m_node(other.m_node) {} reference operator *() const { return const_cast(*m_node); } pointer operator->() const { return const_cast(m_node.get()); } node_iterator& operator++() { m_node = m_node->next_sibling(); return *this; } node_iterator operator++(int) { node_iterator tmp = *this; ++(*this); return tmp; } node_iterator& operator--() { m_node = m_node->previous_sibling(); return *this; } node_iterator operator--(int) { node_iterator tmp = *this; --(*this); return tmp; } bool operator == (const node_iterator& rhs) const { return m_node == rhs.m_node; } bool operator != (const node_iterator& rhs) const { return m_node != rhs.m_node; } node_iterator & operator = (node_iterator && other) noexcept { m_node = other.m_node; return *this; } node_iterator & operator = (node_iterator const & other) { m_node = other.m_node; return *this; } bool valid() { return m_node.has_value(); } private: optional_ptr> m_node; }; //! Iterator of child nodes of xml_node template class descendant_iterator { public: using value_type = xml_node; using reference = xml_node &; using pointer = xml_node *; using iterator_category = std::bidirectional_iterator_tag; using difference_type = long; descendant_iterator() : m_parent(), m_node() { } explicit descendant_iterator(xml_node::ptr node) : m_parent(node), m_node(node->first_node()) { } descendant_iterator(descendant_iterator && other) noexcept : m_parent(other.m_parent), m_node(other.m_node) {} descendant_iterator(descendant_iterator const & other) : m_parent(other.m_parent), m_node(other.m_node) {} reference operator *() const { return const_cast(*m_node); } pointer operator->() const { return const_cast(m_node.get()); } descendant_iterator& operator++() { if (m_node->first_node()) { m_node = m_node->first_node(); } else if (m_node->next_sibling()) { m_node = m_node->next_sibling(); } else { // Run out of children, so move upward until we can find a sibling. while (true) { m_node = m_node->parent(); if (m_node == m_parent) { m_node = nullptr; break; } if (m_node->next_sibling()) { m_node = m_node->next_sibling(); break; } } } return *this; } descendant_iterator operator++(int) { node_iterator tmp = *this; ++(*this); return tmp; } descendant_iterator& operator--() { if (!m_node->previous_sibling()) { m_node = m_node->parent(); if (m_node == m_parent) { m_node = nullptr; } } else { m_node = m_node->previous_sibling(); while (m_node->last_node()) { m_node = m_node->last_node(); } } return *this; } descendant_iterator operator--(int) { node_iterator tmp = *this; --(*this); return tmp; } bool operator == (const descendant_iterator& rhs) const { return m_node == rhs.m_node; } bool operator != (const descendant_iterator& rhs) const { return m_node != rhs.m_node; } descendant_iterator & operator = (descendant_iterator && other) noexcept { m_parent = other.m_parent; m_node = other.m_node; return *this; } descendant_iterator & operator = (descendant_iterator const & other) { m_parent = other.m_parent; m_node = other.m_node; return *this; } bool valid() { return m_node.has_value(); } private: optional_ptr> m_parent; optional_ptr> m_node; }; //! Iterator of child attributes of xml_node template class attribute_iterator { public: using value_type = xml_attribute; using reference = xml_attribute &; using pointer = xml_attribute *; using iterator_category = std::bidirectional_iterator_tag; using difference_type = long; attribute_iterator() : m_attribute() { } explicit attribute_iterator(xml_node const &node) : m_attribute(node.first_attribute()) { } attribute_iterator(attribute_iterator && other) noexcept : m_attribute(other.m_attribute) {} attribute_iterator(attribute_iterator const & other) : m_attribute(other.m_attribute) {} reference operator *() const { return const_cast(*m_attribute); } pointer operator->() const { return const_cast(m_attribute.get()); } attribute_iterator& operator++() { m_attribute = m_attribute->next_attribute(); return *this; } attribute_iterator operator++(int) { attribute_iterator tmp = *this; ++*this; return tmp; } attribute_iterator& operator--() { m_attribute = m_attribute->previous_attribute(); return *this; } attribute_iterator operator--(int) { attribute_iterator tmp = *this; --*this; return tmp; } bool operator ==(const attribute_iterator &rhs) const { return m_attribute == rhs.m_attribute; } bool operator !=(const attribute_iterator &rhs) const { return m_attribute != rhs.m_attribute; } attribute_iterator & operator = (attribute_iterator && other) noexcept { m_attribute = other.m_attribute; return *this; } attribute_iterator & operator = (attribute_iterator const & other) { m_attribute = other.m_attribute; return *this; } private: optional_ptr> m_attribute; }; //! Container adaptor for child nodes template class children { xml_node const & m_node; public: explicit children(xml_node const & node) : m_node(node) {} explicit children(optional_ptr> const ptr) : m_node(ptr.value()) {} children(children && other) noexcept : m_node(other.m_node) {} children(children const & other) : m_node(other.m_node) {} using const_iterator = node_iterator; using iterator = node_iterator; iterator begin() { return iterator(m_node); } iterator end() { return {}; } const_iterator begin() const { return const_iterator(m_node); } const_iterator end() const { return {}; } }; //! Container adaptor for child nodes template class descendants { xml_node & m_node; public: explicit descendants(xml_node & node) : m_node(node) {} explicit descendants(optional_ptr> ptr) : m_node(ptr.value()) {} descendants(descendants && other) noexcept : m_node(other.m_node) {} descendants(descendants const & other) : m_node(other.m_node) {} using const_iterator = descendant_iterator; using iterator = descendant_iterator; iterator begin() { return iterator(&m_node); } iterator end() { return {}; } const_iterator begin() const { return const_iterator(&m_node); } const_iterator end() const { return {}; } }; //! Container adaptor for attributes template class attributes { xml_node const & m_node; public: explicit attributes(xml_node const & node) : m_node(node) {} explicit attributes(optional_ptr> ptr) : m_node(ptr.value()) {} using const_iterator = attribute_iterator; using iterator = attribute_iterator; iterator begin() { return iterator{m_node}; } iterator end() { return {}; } const_iterator begin() const { return const_iterator{m_node}; } const_iterator end() const { return {}; } }; } template inline constexpr bool std::ranges::enable_borrowed_range> = true; template inline constexpr bool std::ranges::enable_borrowed_range> = true; #endif ================================================ FILE: include/flxml/predicates.h ================================================ // // Created by dave on 29/07/2024. // #ifndef RAPIDXML_RAPIDXML_PREDICATES_HPP #define RAPIDXML_RAPIDXML_PREDICATES_HPP #include #include #include #include namespace flxml { template class xpath; namespace internal { template class xpath_base; template class name : public flxml::internal::xpath_base { private: std::basic_string m_name; std::optional> m_xmlns; public: explicit name(std::basic_string_view n) : xpath_base(), m_name(n) {} explicit name(std::basic_string const & xmlns, std::basic_string_view n) : xpath_base(), m_name(n), m_xmlns(xmlns) {} bool do_match(const xml_node & t) override { if (m_xmlns.has_value() && t.xmlns() != m_xmlns.value()) return false; return (t.type() == node_type::node_element) && (t.name() == m_name || m_name == "*"); } }; template class value : public flxml::internal::xpath_base { private: std::basic_string m_value; public: explicit value(std::basic_string_view v) : xpath_base(), m_value(v) {} bool do_match(const xml_node & t) override { return (t.type() == node_type::node_element) && (t.value() == m_value); } }; template class xmlns : public flxml::internal::xpath_base { private: std::basic_string m_xmlns; public: explicit xmlns(std::basic_string_view v) : xpath_base(), m_xmlns(v) {} bool do_match(const xml_node & t) override { return (t.type() == node_type::node_element) && (t.xmlns() == m_xmlns); } }; template class attr : public flxml::internal::xpath_base { private: std::basic_string m_name; std::basic_string m_value; std::optional> m_xmlns; public: explicit attr(std::basic_string_view n, std::basic_string_view v) : xpath_base(), m_name(n), m_value(v) {} explicit attr(std::basic_string const & x, std::basic_string_view n, std::basic_string_view v) : xpath_base(), m_name(n), m_value(v), m_xmlns(x) {} bool do_match(const xml_node & t) override { if (t.type() != node_type::node_element) return false; for (auto const & attr : t.attributes()) { if (m_xmlns.has_value()) { if (m_name == "*" || attr.local_name() != m_name) continue; if (attr.xmlns() != m_xmlns.value()) continue; } else { if (m_name == "*" || attr.name() != m_name) continue; } return attr.value() == m_value; } return false; } }; template class root : public flxml::internal::xpath_base { public: root() = default; generator &> do_gather(xml_node & t) override { for (auto & x : t.children()) { co_yield x; } } bool do_match(const xml_node & t) override { return t.type() == node_type::node_document || t.type() == node_type::node_element; } }; template class any : public flxml::internal::xpath_base { public: any() = default; generator &> do_gather(xml_node & t) override { co_yield t; // self for (auto & x : t.descendants()) { co_yield x; } } bool do_match(const xml_node & t) override { return t.type() == node_type::node_document || t.type() == node_type::node_element; } }; template class xpath_base { private: std::list>> m_contexts; public: xpath_base() = default; virtual ~xpath_base() = default; virtual generator &> do_gather(xml_node & t) { co_yield t; } generator &> gather(xml_node & t) { for (auto & x : do_gather(t)) { if (match(x)) co_yield x; } } virtual bool do_match(const xml_node & t) = 0; bool match(xml_node & t) { if (!do_match(t)) { return false; } for(auto & context : m_contexts) { if (!context->first(t)) { return false; } } return true; } void context(std::unique_ptr> && xp) { m_contexts.emplace_back(std::move(xp)); } auto & contexts() const { return m_contexts; } }; std::map xmlns_empty = {}; } template class xpath : public internal::xpath_base { private: std::vector>> m_chain; std::map const & m_xmlns; public: bool do_match(const xml_node & t) override { return false; } auto const & chain() const { return m_chain; } std::string const & prefix_lookup(std::basic_string_view const & prefix) const { std::basic_string p{prefix}; auto it = m_xmlns.find(p); if (it != m_xmlns.end()) { return (*it).second; } throw std::runtime_error("XPath contains unknown prefix"); } static void parse_predicate(std::basic_string_view const &name, xpath &xp, bool inner) { using xml_doc = xml_document; if (name.starts_with('@')) { std::basic_string text = "(text); auto attr = doc.first_node()->first_attribute(); auto colon = attr->name().find(':'); if (colon != xml_attribute::view_type::npos) { auto const & uri = xp.prefix_lookup(attr->name().substr(0, colon)); xp.m_chain.push_back(std::make_unique>(uri, attr->local_name(), attr->value())); } else { xp.m_chain.push_back(std::make_unique>(star ? "*" : attr->name(), attr->value())); } } else if (name.starts_with("text()")) { // text match std::basic_string text = "(text); auto attr = doc.first_node()->first_attribute(); xp.m_chain.push_back(std::make_unique>(attr->value())); } else if (name.starts_with("namespace-uri()")) { // text match std::basic_string text = "(text); auto attr = doc.first_node()->first_attribute(); xp.m_chain.push_back(std::make_unique>(attr->value())); } else { if (xp.m_chain.empty() && inner) { xp.m_chain.push_back(std::make_unique>()); } auto colon = name.find(':'); if (colon != std::basic_string_view::npos) { auto const & uri = xp.prefix_lookup(name.substr(0, colon)); xp.m_chain.push_back(std::make_unique>(uri, name.substr(colon + 1))); } else { xp.m_chain.push_back(std::make_unique>(name)); } } } static bool parse_inner(std::map & xmlns, std::basic_string_view &view, xpath &xp, bool first=false, bool inner=false) { if (view.starts_with("//")) { xp.m_chain.push_back(std::make_unique>()); view.remove_prefix(2); } else if (view.starts_with('/')) { xp.m_chain.push_back(std::make_unique>()); view.remove_prefix(1); } else if (first && !inner) { xp.m_chain.push_back(std::make_unique>()); } for (typename std::basic_string_view::size_type i = 0; i != view.size(); ++i) { switch (view[i]) { case '/': case ']': if (i == 0) throw std::runtime_error("Empty name?"); case '[': if (i != 0) parse_predicate(view.substr(0, i), xp, inner); } switch (view[i]) { case ']': view.remove_prefix(i + 1); if (!inner) throw std::runtime_error("Unexpected ] in input"); return true; case '[': view.remove_prefix(i + 1); xp.m_chain[xp.m_chain.size() - 1]->context(parse_cont(xmlns, view)); return false; case '/': view.remove_prefix(i ); return false; } } if (!view.empty()) { parse_predicate(view, xp, inner); view.remove_prefix(view.length()); } return true; } static std::unique_ptr> parse_cont(std::map & xmlns, std::basic_string_view &view) { if (view.empty()) throw std::runtime_error("Context expression is empty"); auto xp = std::make_unique>(xmlns); if (!parse_inner(xmlns, view, *xp, true, true)) { while (!view.empty()) { if (parse_inner(xmlns, view, *xp, false, true)) break; } } return xp; } static std::unique_ptr> parse(std::map & xmlns, std::basic_string_view &view) { if (view.empty()) throw std::runtime_error("XPath expression is empty"); auto xp = std::make_unique>(xmlns); if (!parse_inner(xmlns, view, *xp, true, false)) { while (!view.empty()) { if (parse_inner(xmlns, view, *xp, false, false)) break; } } return xp; } static std::unique_ptr> parse(std::map & xmlns, std::basic_string_view const &view) { std::basic_string_view sv(view); return parse(xmlns, sv); } static std::unique_ptr> parse(std::map & xmlns, std::basic_string const &view) { std::basic_string_view sv(view); return parse(xmlns, sv); } static std::unique_ptr> parse(std::map & xmlns, const char * view) { std::basic_string_view sv(view); return parse(xmlns, sv); } static std::unique_ptr> parse(std::basic_string_view &sv) { return parse(internal::xmlns_empty, sv); } static std::unique_ptr> parse(std::basic_string const &view) { std::basic_string_view sv(view); return parse(internal::xmlns_empty, sv); } static std::unique_ptr> parse(std::basic_string_view const &view) { std::basic_string_view sv(view); return parse(internal::xmlns_empty, sv); } static std::unique_ptr> parse(const char * view) { std::basic_string_view sv(view); return parse(internal::xmlns_empty, sv); } explicit xpath(std::map & xmlns) : m_xmlns(xmlns) {} flxml::generator &> all(xml_node & current, unsigned int depth = 0) { if (depth >= m_chain.size()) throw std::logic_error("Depth exceeded"); auto & xp = m_chain[depth]; depth++; for (auto & r : xp->gather(current)) { if (depth >= m_chain.size()) { co_yield r; } else { for (auto & t : all(r, depth)) { co_yield t; } } } } xml_node::ptr first(xml_node & current) { for (auto &r: all(current)) { return &r; } return {}; } }; } #endif //RAPIDXML_RAPIDXML_PREDICATES_HPP ================================================ FILE: include/flxml/print.h ================================================ #ifndef RAPIDXML_PRINT_HPP_INCLUDED #define RAPIDXML_PRINT_HPP_INCLUDED // Copyright (C) 2006, 2009 Marcin Kalicinski // Version 1.13 // Revision $DateTime: 2009/05/13 01:46:17 $ //! \file rapidxml_print.hpp This file contains rapidxml printer implementation #include // Only include streams if not disabled #ifndef FLXML_NO_STREAMS #include #include #endif namespace flxml { /////////////////////////////////////////////////////////////////////// // Printing flags const int print_no_indenting = 0x1; //!< Printer flag instructing the printer to suppress indenting of XML. See print() function. /////////////////////////////////////////////////////////////////////// // Internal //! \cond internal namespace internal { /////////////////////////////////////////////////////////////////////////// // Internal character operations // Copy characters from given range to given output iterator template inline OutIt copy_chars(const Ch *begin, const Ch *end, OutIt out) { while (begin != end) *out++ = *begin++; return out; } template inline OutIt copy_chars(std::basic_string_view const & sv, OutIt out) { return copy_chars(sv.data(), sv.data() + sv.size(), out); } // Copy characters from given range to given output iterator and expand // characters into references (< > ' " &) template inline OutIt copy_and_expand_chars(const Ch *begin, const Ch *end, Ch noexpand, OutIt out) { while (begin != end) { if (*begin == noexpand) { *out++ = *begin; // No expansion, copy character } else { switch (*begin) { case Ch('<'): *out++ = Ch('&'); *out++ = Ch('l'); *out++ = Ch('t'); *out++ = Ch(';'); break; case Ch('>'): *out++ = Ch('&'); *out++ = Ch('g'); *out++ = Ch('t'); *out++ = Ch(';'); break; case Ch('\''): *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('p'); *out++ = Ch('o'); *out++ = Ch('s'); *out++ = Ch(';'); break; case Ch('"'): *out++ = Ch('&'); *out++ = Ch('q'); *out++ = Ch('u'); *out++ = Ch('o'); *out++ = Ch('t'); *out++ = Ch(';'); break; case Ch('&'): *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('m'); *out++ = Ch('p'); *out++ = Ch(';'); break; default: *out++ = *begin; // No expansion, copy character } } ++begin; // Step to next character } return out; } template inline OutIt copy_and_expand_chars(std::basic_string_view const & sv, Ch noexpand, OutIt out) { return copy_and_expand_chars(sv.data(), sv.data() + sv.size(), noexpand, out); } // Fill given output iterator with repetitions of the same character template inline OutIt fill_chars(OutIt out, int n, Ch ch) { for (int i = 0; i < n; ++i) *out++ = ch; return out; } // Find character template inline bool find_char(const Ch *begin, const Ch *end) { while (begin != end) if (*begin++ == ch) return true; return false; } /////////////////////////////////////////////////////////////////////////// // Internal printing operations // Print node template inline OutIt print_node(OutIt out, const optional_ptr> node, int flags, int indent); // Print children of the node template inline OutIt print_children(OutIt out, const optional_ptr> node, int flags, int indent) { for (auto child = node->first_node(); child; child = child->next_sibling()) out = print_node(out, child, flags, indent); return out; } // Print attributes of the node template inline OutIt print_attributes(OutIt out, const optional_ptr> node, int) { for (auto attribute = node->first_attribute(); attribute; attribute = attribute->next_attribute()) { if (!(attribute->name().empty()) || attribute->value_raw().empty()) { // Print attribute name *out = Ch(' '), ++out; out = copy_chars(attribute->name(), out); *out = Ch('='), ++out; if (attribute->quote() && !attribute->value_decoded()) { // Shortcut here; just dump out the raw value. *out++ = attribute->quote(); out = copy_chars(attribute->value_raw(), out); **out++ = attribute->quote(); } else { // Print attribute value using appropriate quote type if (attribute->value().find('"') != std::basic_string_view::npos) { *out = Ch('\''), ++out; out = copy_and_expand_chars(attribute->value(), Ch('"'), out); *out = Ch('\''), ++out; } else { *out = Ch('"'), ++out; out = copy_and_expand_chars(attribute->value(), Ch('\''), out); *out = Ch('"'), ++out; } } } } return out; } // Print data node template inline OutIt print_data_node(OutIt out, const optional_ptr> node, int flags, int indent) { assert(node->type() == node_type::node_data); if (!(flags & print_no_indenting)) out = fill_chars(out, indent, Ch('\t')); if (!node->value_decoded()) { out = copy_chars(node->value_raw(), out); } else { out = copy_and_expand_chars(node->value(), Ch(0), out); } return out; } // Print data node template inline OutIt print_cdata_node(OutIt out, const optional_ptr> node, int flags, int indent) { assert(node->type() == node_type::node_cdata); if (!(flags & print_no_indenting)) out = fill_chars(out, indent, Ch('\t')); *out = Ch('<'); ++out; *out = Ch('!'); ++out; *out = Ch('['); ++out; *out = Ch('C'); ++out; *out = Ch('D'); ++out; *out = Ch('A'); ++out; *out = Ch('T'); ++out; *out = Ch('A'); ++out; *out = Ch('['); ++out; out = copy_chars(node->value(), out); *out = Ch(']'); ++out; *out = Ch(']'); ++out; *out = Ch('>'); ++out; return out; } // Print element node template inline OutIt print_element_node(OutIt out, const optional_ptr> node, int flags, int indent) { assert(node->type() == node_type::node_element); // Print element name and attributes, if any if (!(flags & print_no_indenting)) out = fill_chars(out, indent, Ch('\t')); *out = Ch('<'), ++out; if (!node->prefix().empty()) { out = copy_chars(node->prefix(), out); *out = Ch(':'); ++out; } out = copy_chars(node->name(), out); out = print_attributes(out, node, flags); // If node is childless if (node->value().empty() && !node->first_node()) { // Print childless node tag ending *out = Ch('/'), ++out; *out = Ch('>'), ++out; } else { // Print normal node tag ending *out = Ch('>'), ++out; // If the node is clean, just output the contents and move on. // Can only do this if we're not indenting, otherwise pretty-print won't work. if (node->clean() && (flags & print_no_indenting)) { out = copy_chars(node->contents(), out); } else { // Test if node contains a single data node only (and no other nodes) auto child = node->first_node(); if (!child) { // If node has no children, only print its value without indenting if (!node->value_decoded()) { out = copy_chars(node->value_raw(), out); } else { out = copy_and_expand_chars(node->value(), Ch(0), out); } } else if (!child->next_sibling() && child->type() == node_type::node_data) { // If node has a sole data child, only print its value without indenting if (!child->value_decoded()) { out = copy_chars(child->value_raw(), out); } else { out = copy_and_expand_chars(child->value(), Ch(0), out); } } else { // Print all children with full indenting if (!(flags & print_no_indenting)) *out = Ch('\n'), ++out; out = print_children(out, node, flags, indent + 1); if (!(flags & print_no_indenting)) out = fill_chars(out, indent, Ch('\t')); } } // Print node end *out = Ch('<'), ++out; *out = Ch('/'), ++out; if (!node->prefix().empty()) { out = copy_chars(node->prefix(), out); *out = Ch(':'); ++out; } out = copy_chars(node->name(), out); *out = Ch('>'), ++out; } return out; } // Print declaration node template inline OutIt print_declaration_node(OutIt out, const optional_ptr> node, int flags, int indent) { // Print declaration start if (!(flags & print_no_indenting)) out = fill_chars(out, indent, Ch('\t')); *out = Ch('<'), ++out; *out = Ch('?'), ++out; *out = Ch('x'), ++out; *out = Ch('m'), ++out; *out = Ch('l'), ++out; // Print attributes out = print_attributes(out, node, flags); // Print declaration end *out = Ch('?'), ++out; *out = Ch('>'), ++out; return out; } // Print comment node template inline OutIt print_comment_node(OutIt out, const optional_ptr> node, int flags, int indent) { assert(node->type() == node_type::node_comment); if (!(flags & print_no_indenting)) out = fill_chars(out, indent, Ch('\t')); *out = Ch('<'), ++out; *out = Ch('!'), ++out; *out = Ch('-'), ++out; *out = Ch('-'), ++out; out = copy_chars(node->value(), out); *out = Ch('-'), ++out; *out = Ch('-'), ++out; *out = Ch('>'), ++out; return out; } // Print doctype node template inline OutIt print_doctype_node(OutIt out, const optional_ptr> node, int flags, int indent) { assert(node->type() == node_type::node_doctype); if (!(flags & print_no_indenting)) out = fill_chars(out, indent, Ch('\t')); *out = Ch('<'), ++out; *out = Ch('!'), ++out; *out = Ch('D'), ++out; *out = Ch('O'), ++out; *out = Ch('C'), ++out; *out = Ch('T'), ++out; *out = Ch('Y'), ++out; *out = Ch('P'), ++out; *out = Ch('E'), ++out; *out = Ch(' '), ++out; out = copy_chars(node->value(), out); *out = Ch('>'), ++out; return out; } // Print pi node template inline OutIt print_pi_node(OutIt out, const optional_ptr> node, int flags, int indent) { assert(node->type() == node_type::node_pi); if (!(flags & print_no_indenting)) out = fill_chars(out, indent, Ch('\t')); *out = Ch('<'), ++out; *out = Ch('?'), ++out; out = copy_chars(node->name(), out); *out = Ch(' '), ++out; out = copy_chars(node->value(), out); *out = Ch('?'), ++out; *out = Ch('>'), ++out; return out; } // Print literal node template inline OutIt print_literal_node(OutIt out, const optional_ptr> node, int flags, int indent) { assert(node->type() == node_type::node_literal); if (!(flags & print_no_indenting)) out = fill_chars(out, indent, Ch('\t')); out = copy_chars(node->value(), out); return out; } // Print node // Print node template inline OutIt print_node(OutIt out, const optional_ptr> node, int flags, int indent) { // Print proper node type switch (node->type()) { // Document case node_document: out = print_children(out, node, flags, indent); break; // Element case node_element: out = print_element_node(out, node, flags, indent); break; // Data case node_data: out = print_data_node(out, node, flags, indent); break; // CDATA case node_cdata: out = print_cdata_node(out, node, flags, indent); break; // Declaration case node_declaration: out = print_declaration_node(out, node, flags, indent); break; // Comment case node_comment: out = print_comment_node(out, node, flags, indent); break; // Doctype case node_doctype: out = print_doctype_node(out, node, flags, indent); break; // Pi case node_pi: out = print_pi_node(out, node, flags, indent); break; case node_literal: out = print_literal_node(out, node, flags, indent); break; // Unknown default: assert(0); break; } // If indenting not disabled, add line break after node if (!(flags & print_no_indenting)) *out = Ch('\n'), ++out; // Return modified iterator return out; } } //! \endcond /////////////////////////////////////////////////////////////////////////// // Printing //! Prints XML to given output iterator. //! \param out Output iterator to print to. //! \param node Node to be printed. Pass xml_document to print entire document. //! \param flags Flags controlling how XML is printed. //! \return Output iterator pointing to position immediately after last character of printed text. template inline OutIt print(OutIt out, const xml_node &node, int flags = 0) { flxml::optional_ptr ptr(const_cast *>(&node)); return internal::print_node(out, ptr, flags, 0); } #ifndef RAPIDXML_NO_STREAMS //! Prints XML to given output stream. //! \param out Output stream to print to. //! \param node Node to be printed. Pass xml_document to print entire document. //! \param flags Flags controlling how XML is printed. //! \return Output stream. template inline std::basic_ostream &print(std::basic_ostream &out, const xml_node &node, int flags = 0) { print(std::ostream_iterator(out), node, flags); return out; } //! Prints formatted XML to given output stream. Uses default printing flags. Use print() function to customize printing process. //! \param out Output stream to print to. //! \param node Node to be printed. //! \return Output stream. template inline std::basic_ostream &operator <<(std::basic_ostream &out, const xml_node &node) { return print(out, node); } #endif } #endif ================================================ FILE: include/flxml/tables.h ================================================ // // Created by dwd on 9/7/24. // #ifndef RAPIDXML_RAPIDXML_TABLES_HPP #define RAPIDXML_RAPIDXML_TABLES_HPP #include #include /////////////////////////////////////////////////////////////////////// // Internals //! \cond internal namespace flxml::internal { // Struct that contains lookup tables for the parser struct lookup_tables { // Whitespace (space \n \r \t) static inline const std::vector lookup_whitespace = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, false, false, false, false, false, false, false, false, true , true , false, false, true , false, false, // 0 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 1 true , false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 2 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 3 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 4 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 5 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 6 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 7 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 8 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 9 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // A false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // B false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // C false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // D false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // E false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false // F }; // Element name (anything but space \n \r \t / > ? \0 and :) static inline const std::vector lookup_element_name = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, true , true , true , true , true , true , true , true , false, false, true , true , false, true , true , // 0 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 1 false, true , true , true , true , true , true , true , true , true , true , true , true , true , true , false, // 2 true , true , true , true , true , true , true , true , true , true , false, true , true , true , false, false, // 3 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 4 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 5 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 6 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 7 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 8 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 9 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // A true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // B true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // C true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // D true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // E true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , 1 // F }; // Node name (anything but space \n \r \t / > ? \0) static inline const std::vector lookup_node_name = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, true , true , true , true , true , true , true , true , false, false, true , true , false, true , true , // 0 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 1 false, true , true , true , true , true , true , true , true , true , true , true , true , true , true , false, // 2 true , true , true , true , true , true , true , true , true , true , true , true , true , true , false, false, // 3 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 4 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 5 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 6 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 7 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 8 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 9 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // A true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // B true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // C true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // D true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // E true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , 1 // F }; // Text (i.e. PCDATA) (anything but < \0) static inline const std::vector lookup_text = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 0 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 1 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 2 true , true , true , true , true , true , true , true , true , true , true , true , false, true , true , true , // 3 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 4 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 5 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 6 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 7 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 8 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 9 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // A true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // B true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // C true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // D true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // E true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , 1 // F }; // Text (i.e. PCDATA) that does not require processing when ws normalization is disabled // (anything but < \0 &) static inline const std::vector lookup_text_pure_no_ws = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 0 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 1 true , true , true , true , true , true , false, true , true , true , true , true , true , true , true , true , // 2 true , true , true , true , true , true , true , true , true , true , true , true , false, true , true , true , // 3 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 4 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 5 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 6 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 7 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 8 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 9 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // A true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // B true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // C true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // D true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // E true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , 1 // F }; // Text (i.e. PCDATA) that does not require processing when ws normalizationis is enabled // (anything but < \0 & space \n \r \t) static inline const std::vector lookup_text_pure_with_ws = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, true , true , true , true , true , true , true , true , false, false, true , true , false, true , true , // 0 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 1 false, true , true , true , true , true , false, true , true , true , true , true , true , true , true , true , // 2 true , true , true , true , true , true , true , true , true , true , true , true , false, true , true , true , // 3 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 4 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 5 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 6 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 7 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 8 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 9 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // A true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // B true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // C true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // D true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // E true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , 1 // F }; // Attribute name (anything but space \n \r \t / < > = ? ! \0) static inline const std::vector lookup_attribute_name = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, true , true , true , true , true , true , true , true , false, false, true , true , false, true , true , // 0 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 1 false, false, true , true , true , true , true , true , true , true , true , true , true , true , true , false, // 2 true , true , true , true , true , true , true , true , true , true , true , true , false, false, false, false, // 3 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 4 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 5 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 6 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 7 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 8 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 9 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // A true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // B true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // C true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // D true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // E true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , 1 // F }; // Attribute data with single quote (anything but ' \0) static inline const std::vector lookup_attribute_data_1 = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 0 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 1 true , true , true , true , true , true , true , false, true , true , true , true , true , true , true , true , // 2 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 3 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 4 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 5 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 6 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 7 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 8 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 9 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // A true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // B true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // C true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // D true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // E true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , 1 // F }; // Attribute data with single quote that does not require processing (anything but ' \0 &) static inline const std::vector lookup_attribute_data_1_pure = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 0 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 1 true , true , true , true , true , true , false, false, true , true , true , true , true , true , true , true , // 2 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 3 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 4 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 5 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 6 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 7 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 8 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 9 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // A true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // B true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // C true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // D true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // E true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , 1 // F }; // Attribute data with double quote (anything but " \0) static inline const std::vector lookup_attribute_data_2 = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 0 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 1 true , true , false, true , true , true , true , true , true , true , true , true , true , true , true , true , // 2 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 3 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 4 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 5 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 6 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 7 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 8 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 9 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // A true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // B true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // C true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // D true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // E true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , 1 // F }; // Attribute data with double quote that does not require processing (anything but " \0 &) static inline const std::vector lookup_attribute_data_2_pure = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F false, true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 0 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 1 true , true , false, true , true , true , false, true , true , true , true , true , true , true , true , true , // 2 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 3 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 4 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 5 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 6 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 7 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 8 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // 9 true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // A true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // B true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // C true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // D true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , // E true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true // F }; // Digits (dec and hex, 255 denotes end of numeric character reference) static inline const std::array lookup_digits = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 0 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 1 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 2 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,255,255,255,255,255,255, // 3 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 4 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 5 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 6 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 7 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 8 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 9 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // A 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // B 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // C 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // D 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // E 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // F }; }; } //! \endcond #endif //RAPIDXML_RAPIDXML_TABLES_HPP ================================================ FILE: include/flxml/utils.h ================================================ #ifndef RAPIDXML_UTILS_HPP_INCLUDED #define RAPIDXML_UTILS_HPP_INCLUDED // Copyright (C) 2006, 2009 Marcin Kalicinski // Version 1.13 // Revision $DateTime: 2009/05/13 01:46:17 $ //! \file rapidxml_utils.hpp This file contains high-level rapidxml utilities that can be useful //! in certain simple scenarios. They should probably not be used if maximizing performance is the main objective. #include #include #include #include #include namespace flxml { //! Represents data loaded from a file template class file { public: //! Loads file into the memory. Data will be automatically destroyed by the destructor. //! \param filename Filename to load. file(const char *filename) { using namespace std; // Open stream basic_ifstream stream(filename, ios::binary); if (!stream) throw runtime_error(string("cannot open file ") + filename); stream.unsetf(ios::skipws); // Determine stream size stream.seekg(0, ios::end); size_t size = stream.tellg(); stream.seekg(0); // Load data and add terminating 0 m_data.resize(size + 1); stream.read(&m_data.front(), static_cast(size)); m_data[size] = 0; } //! Loads file into the memory. Data will be automatically destroyed by the destructor //! \param stream Stream to load from file(std::basic_istream &stream) { using namespace std; // Load data and add terminating 0 stream.unsetf(ios::skipws); m_data.assign(istreambuf_iterator(stream), istreambuf_iterator()); if (stream.fail() || stream.bad()) throw runtime_error("error reading stream"); m_data.push_back(0); } //! Gets file data. //! \return Pointer to data of file. Ch *data() { return &m_data.front(); } //! Gets file data. //! \return Pointer to data of file. const Ch *data() const { return &m_data.front(); } //! Gets file data size. //! \return Size of file data, in characters. std::size_t size() const { return m_data.size(); } private: std::vector m_data; // File data }; //! Counts children of node. Time complexity is O(n). //! \return Number of children of node template inline std::size_t count_children(xml_node *node) { xml_node *child = node->first_node(); std::size_t count = 0; while (child) { ++count; child = child->next_sibling(); } return count; } //! Counts attributes of node. Time complexity is O(n). //! \return Number of attributes of node template inline std::size_t count_attributes(xml_node *node) { xml_attribute *attr = node->first_attribute(); std::size_t count = 0; while (attr) { ++count; attr = attr->next_attribute(); } return count; } } #endif ================================================ FILE: include/flxml/wrappers.h ================================================ // // Created by dave on 10/07/2024. // #ifndef RAPIDXML_RAPIDXML_WRAPPERS_HPP #define RAPIDXML_RAPIDXML_WRAPPERS_HPP #include #include #include namespace flxml { // Most of rapidxml was written to use a NUL-terminated Ch * for parsing. // This utility struct wraps a buffer to provide something that // looks mostly like a pointer, deferencing to NUL when it hits the end. // It's also a forward_iterator, so it'll work with the rage type constructors for string{_view} etc. template struct buffer_ptr { // Iterator magic typedefs using iterator_category = std::contiguous_iterator_tag; using difference_type = T::difference_type; using value_type = T::value_type; using pointer = T::const_pointer; using reference = T::const_reference; using real_it = T::const_iterator; real_it it; real_it end_it; static constexpr value_type end_char = value_type(0); explicit buffer_ptr(T const & buf) : it(buf.cbegin()), end_it(buf.cend()) {} buffer_ptr(buffer_ptr const & other) : it(other.it), end_it(other.end_it) {} buffer_ptr() = default; buffer_ptr & operator = (buffer_ptr const & other) { it = other.it; return *this; } reference validated_it(typename T::const_iterator const &it) const { if (it == end_it) return end_char; return *it; } reference operator[](int i) const { real_it it2 = it + i; if (it2 >= end_it) return end_char; return *it2; } pointer operator -> () const { if (it >= end_it) return &end_char; return &*it; } auto operator <=> (buffer_ptr other) const { return it <=> other.it; } auto operator < (buffer_ptr other) const { return it < other.it; } auto operator > (buffer_ptr other) const { return it > other.it; } auto operator >= (buffer_ptr other) const { return it >= other.it; } auto operator <= (buffer_ptr other) const { return it <= other.it; } buffer_ptr & operator ++() { ++it; return *this; } buffer_ptr operator ++(int) { auto old = *this; ++it; return old; } buffer_ptr & operator --() { --it; return *this; } buffer_ptr operator --(int) { auto old = *this; --it; return old; } reference operator *() const { return validated_it(it); } bool operator == (buffer_ptr const & other) const { return it == other.it; } auto operator + (difference_type n) const { buffer_ptr other(*this); other.it += n; return other; } buffer_ptr & operator += (difference_type i) { it += i; return *this; } auto operator - (difference_type n) const { buffer_ptr other(*this); other.it -= n; return other; } buffer_ptr & operator -= (difference_type i) { it -= i; return *this; } difference_type operator - (buffer_ptr const & other) const { return it - other.it; } pointer ptr() { return &*it; } }; template static auto operator + (int n, buffer_ptr it) { it.it += n; return it; } class no_such_node : std::runtime_error { public: no_such_node() : std::runtime_error("No such node") {} }; template class optional_ptr { T * m_ptr; void assert_value() const { if (m_ptr == nullptr) { throw no_such_node(); } } public: optional_ptr(std::nullptr_t) : m_ptr(nullptr) {} optional_ptr() : m_ptr(nullptr) {} optional_ptr(T * ptr) : m_ptr(ptr) {} bool has_value() const { return m_ptr != nullptr; } T & value() { assert_value(); return *m_ptr; } T * get() { assert_value(); return m_ptr; } T * operator -> () { return get(); } T & operator * () { return value(); } T * ptr_unsafe() { return m_ptr; } T const & value() const { assert_value(); return *m_ptr; } T const * get() const { assert_value(); return m_ptr; } T const * operator -> () const { return get(); } T const & operator * () const { return value(); } T const * ptr_unsafe() const { return m_ptr; } bool operator ! () const { return m_ptr == nullptr; } operator bool() const { return m_ptr != nullptr; } bool operator == (T * t) const { return m_ptr == t; } bool operator == (optional_ptr const & t) const { return m_ptr == t.m_ptr; } }; } #endif //RAPIDXML_RAPIDXML_WRAPPERS_HPP ================================================ FILE: include/flxml.h ================================================ #ifndef RAPIDXML_HPP_INCLUDED #define RAPIDXML_HPP_INCLUDED // Copyright (C) 2006, 2009 Marcin Kalicinski // Version 1.13 // Revision $DateTime: 2009/05/13 01:46:17 $ //! \file rapidxml.hpp This file contains rapidxml parser and DOM implementation #include #include #include // For std::size_t #include // For assert #include // For placement new #include #include #include #include #include // For std::runtime_error // On MSVC, disable "conditional expression is constant" warning (level 4). // This warning is almost impossible to avoid with certain types of templated code #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable:4127) // Conditional expression is constant #endif /////////////////////////////////////////////////////////////////////////// // RAPIDXML_PARSE_ERROR #if defined(FLXML_NO_EXCEPTIONS) #define FLXML_PARSE_ERROR(what, where) { parse_error_handler(what, where); assert(0); } #define FLML_EOF_ERROR(what, where) { parse_error_handler(what, where); assert(0); } namespace flxml { //! When exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, //! this function is called to notify user about the error. //! It must be defined by the user. //!

//! This function cannot return. If it does, the results are undefined. //!

//! A very simple definition might look like that: //!
    //! void %rapidxml::%parse_error_handler(const char *what, void *where)
    //! {
    //!     std::cout << "Parse error: " << what << "\n";
    //!     std::abort();
    //! }
    //! 
//! \param what Human readable description of the error. //! \param where Pointer to character data where error was detected. void parse_error_handler(const char *what, void *where); } #else #define FLXML_PARSE_ERROR(what, where) {if (*where == Ch(0)) throw eof_error(what, nullptr); else throw parse_error(what, nullptr);} (void)0 #define FLXML_EOF_ERROR(what, where) throw eof_error(what, nullptr) namespace flxml { //! Parse error exception. //! This exception is thrown by the parser when an error occurs. //! Use what() function to get human-readable error message. //! Use where() function to get a pointer to position within source text where error was detected. //!

//! If throwing exceptions by the parser is undesirable, //! it can be disabled by defining RAPIDXML_NO_EXCEPTIONS macro before rapidxml.hpp is included. //! This will cause the parser to call rapidxml::parse_error_handler() function instead of throwing an exception. //! This function must be defined by the user. //!

//! This class derives from std::exception class. class parse_error: public std::runtime_error { public: //! Constructs parse error parse_error(const char *what, void *where) : std::runtime_error(what) , m_where(where) { } //! Gets pointer to character data where error happened. //! Ch should be the same as char type of xml_document that produced the error. //! \return Pointer to location within the parsed string where error occured. template Ch *where() const { return reinterpret_cast(m_where); } private: void *m_where; }; class eof_error : public parse_error { public: using parse_error::parse_error; }; class validation_error : public std::runtime_error { public: using std::runtime_error::runtime_error; }; class xmlns_unbound : public validation_error { public: using validation_error::validation_error; }; class duplicate_attribute : public validation_error { public: using validation_error::validation_error; }; class attr_xmlns_unbound : public xmlns_unbound { public: using xmlns_unbound::xmlns_unbound; }; class element_xmlns_unbound : public xmlns_unbound { public: using xmlns_unbound::xmlns_unbound; }; } #endif /////////////////////////////////////////////////////////////////////////// // Pool sizes #ifndef FLXML_STATIC_POOL_SIZE // Size of static memory block of memory_pool. // Define RAPIDXML_STATIC_POOL_SIZE before including rapidxml.hpp if you want to override the default value. // No dynamic memory allocations are performed by memory_pool until static memory is exhausted. #define FLXML_STATIC_POOL_SIZE (64 * 1024) #endif #ifndef FLXML_DYNAMIC_POOL_SIZE // Size of dynamic memory block of memory_pool. // Define RAPIDXML_DYNAMIC_POOL_SIZE before including rapidxml.hpp if you want to override the default value. // After the static block is exhausted, dynamic blocks with approximately this size are allocated by memory_pool. #define FLXML_DYNAMIC_POOL_SIZE (64 * 1024) #endif namespace flxml { // Forward declarations template class xml_node; template class xml_attribute; template class xml_document; template class children; template class descendants; template class attributes; //! Enumeration listing all node types produced by the parser. //! Use xml_node::type() function to query node type. enum class node_type { node_document, //!< A document node. Name and value are empty. node_element, //!< An element node. Name contains element name. Value contains text of first data node. node_data, //!< A data node. Name is empty. Value contains data text. node_cdata, //!< A CDATA node. Name is empty. Value contains data text. node_comment, //!< A comment node. Name is empty. Value contains comment text. node_declaration, //!< A declaration node. Name and value are empty. Declaration parameters (version, encoding and standalone) are in node attributes. node_doctype, //!< A DOCTYPE node. Name is empty. Value contains DOCTYPE text. node_pi, //!< A PI node. Name contains target. Value contains instructions. node_literal //!< Value is unencoded text (used for inserting pre-rendered XML). }; using enum node_type; // Import this into the rapidxml namespace as before. /////////////////////////////////////////////////////////////////////// // Parsing flags //! Parse flag instructing the parser to not create data nodes. //! Text of first data node will still be placed in value of parent element, unless rapidxml::parse_no_element_values flag is also specified. //! Can be combined with other flags by use of | operator. //!

//! See xml_document::parse() function. const int parse_no_data_nodes = 0x1; //! Parse flag instructing the parser to not use text of first data node as a value of parent element. //! Can be combined with other flags by use of | operator. //! Note that child data nodes of element node take precendence over its value when printing. //! That is, if element has one or more child data nodes and a value, the value will be ignored. //! Use rapidxml::parse_no_data_nodes flag to prevent creation of data nodes if you want to manipulate data using values of elements. //!

//! See xml_document::parse() function. const int parse_no_element_values = 0x2; //! Parse flag instructing the parser to not translate entities in the source text. //! By default entities are translated, modifying source text. //! Can be combined with other flags by use of | operator. //!

//! See xml_document::parse() function. const int parse_no_entity_translation = 0x8; //! Parse flag instructing the parser to disable UTF-8 handling and assume plain 8 bit characters. //! By default, UTF-8 handling is enabled. //! Can be combined with other flags by use of | operator. //!

//! See xml_document::parse() function. const int parse_no_utf8 = 0x10; //! Parse flag instructing the parser to create XML declaration node. //! By default, declaration node is not created. //! Can be combined with other flags by use of | operator. //!

//! See xml_document::parse() function. const int parse_declaration_node = 0x20; //! Parse flag instructing the parser to create comments nodes. //! By default, comment nodes are not created. //! Can be combined with other flags by use of | operator. //!

//! See xml_document::parse() function. const int parse_comment_nodes = 0x40; //! Parse flag instructing the parser to create DOCTYPE node. //! By default, doctype node is not created. //! Although W3C specification allows at most one DOCTYPE node, RapidXml will silently accept documents with more than one. //! Can be combined with other flags by use of | operator. //!

//! See xml_document::parse() function. const int parse_doctype_node = 0x80; //! Parse flag instructing the parser to create PI nodes. //! By default, PI nodes are not created. //! Can be combined with other flags by use of | operator. //!

//! See xml_document::parse() function. const int parse_pi_nodes = 0x100; //! Parse flag instructing the parser to validate closing tag names. //! If not set, name inside closing tag is irrelevant to the parser. //! By default, closing tags are not validated. //! Can be combined with other flags by use of | operator. //!

//! See xml_document::parse() function. const int parse_validate_closing_tags = 0x200; //! Parse flag instructing the parser to trim all leading and trailing whitespace of data nodes. //! By default, whitespace is not trimmed. //! This flag does not cause the parser to modify source text. //! Can be combined with other flags by use of | operator. //!

//! See xml_document::parse() function. const int parse_trim_whitespace = 0x400; //! Parse flag instructing the parser to condense all whitespace runs of data nodes to a single space character. //! Trimming of leading and trailing whitespace of data is controlled by rapidxml::parse_trim_whitespace flag. //! By default, whitespace is not normalized. //! If this flag is specified, source text will be modified. //! Can be combined with other flags by use of | operator. //!

//! See xml_document::parse() function. const int parse_normalize_whitespace = 0x800; //! Parse flag to say "Parse only the initial element opening." //! Useful for XMLstreams used in XMPP. const int parse_open_only = 0x1000; //! Parse flag to say "Toss the children of the top node and parse off //! one element. //! Useful for parsing off XMPP top-level elements. const int parse_parse_one = 0x2000; //! Parse flag to say "Validate XML namespaces fully." //! This will generate additional errors, including unbound prefixes //! and duplicate attributes (with different prefices) const int parse_validate_xmlns = 0x4000; // Compound flags //! Parse flags which represent default behaviour of the parser. //! This is always equal to 0, so that all other flags can be simply ored together. //! Normally there is no need to inconveniently disable flags by anding with their negated (~) values. //! This also means that meaning of each flag is a negation of the default setting. //! For example, if flag name is rapidxml::parse_no_utf8, it means that utf-8 is enabled by default, //! and using the flag will disable it. //!

//! See xml_document::parse() function. [[maybe_unused]] const int parse_default = 0; //! A combination of parse flags resulting in fastest possible parsing, without sacrificing important data. //!

//! See xml_document::parse() function. const int parse_fastest = parse_no_data_nodes; //! A combination of parse flags resulting in largest amount of data being extracted. //! This usually results in slowest parsing. //!

//! See xml_document::parse() function. const int parse_full = parse_declaration_node | parse_comment_nodes | parse_doctype_node | parse_pi_nodes | parse_validate_closing_tags | parse_validate_xmlns; /////////////////////////////////////////////////////////////////////// // Memory pool //! This class is used by the parser to create new nodes and attributes, without overheads of dynamic memory allocation. //! In most cases, you will not need to use this class directly. //! However, if you need to create nodes manually or modify names/values of nodes, //! you are encouraged to use memory_pool of relevant xml_document to allocate the memory. //! Not only is this faster than allocating them by using new operator, //! but also their lifetime will be tied to the lifetime of document, //! possibly simplyfing memory management. //!

//! Call allocate_node() or allocate_attribute() functions to obtain new nodes or attributes from the pool. //! You can also call allocate_string() function to allocate strings. //! Such strings can then be used as names or values of nodes without worrying about their lifetime. //! Note that there is no free() function -- all allocations are freed at once when clear() function is called, //! or when the pool is destroyed. //!

//! It is also possible to create a standalone memory_pool, and use it //! to allocate nodes, whose lifetime will not be tied to any document. //!

//! Pool maintains RAPIDXML_STATIC_POOL_SIZE bytes of statically allocated memory. //! Until static memory is exhausted, no dynamic memory allocations are done. //! When static memory is exhausted, pool allocates additional blocks of memory of size RAPIDXML_DYNAMIC_POOL_SIZE each, //! by using global new[] and delete[] operators. //! This behaviour can be changed by setting custom allocation routines. //! Use set_allocator() function to set them. //!

//! Allocations for nodes, attributes and strings are aligned at RAPIDXML_ALIGNMENT bytes. //! This value defaults to the size of pointer on target architecture. //!

//! To obtain absolutely top performance from the parser, //! it is important that all nodes are allocated from a single, contiguous block of memory. //! Otherwise, cache misses when jumping between two (or more) disjoint blocks of memory can slow down parsing quite considerably. //! If required, you can tweak RAPIDXML_STATIC_POOL_SIZE, RAPIDXML_DYNAMIC_POOL_SIZE and RAPIDXML_ALIGNMENT //! to obtain best wasted memory to performance compromise. //! To do it, define their values before rapidxml.hpp file is included. //! \param Ch Character type of created nodes. template class memory_pool { public: //! \cond internal using alloc_func = void * (*)(std::size_t); // Type of user-defined function used to allocate memory using free_func = void (*)(void *); // Type of user-defined function used to free memory //! \endcond //! Constructs empty pool with default allocator functions. memory_pool() { init(); } memory_pool(memory_pool const &) = delete; memory_pool(memory_pool &&) = delete; //! Destroys pool and frees all the memory. //! This causes memory occupied by nodes allocated by the pool to be freed. //! Nodes allocated from the pool are no longer valid. ~memory_pool() { clear(); } using view_type = std::basic_string_view; //! Allocates a new node from the pool, and optionally assigns name and value to it. //! If the allocation request cannot be accomodated, this function will throw std::bad_alloc. //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function //! will call rapidxml::parse_error_handler() function. //! \param type Type of node to create. //! \param name Name to assign to the node, or 0 to assign no name. //! \param value Value to assign to the node, or 0 to assign no value. //! \param name_size Size of name to assign, or 0 to automatically calculate size from name string. //! \param value_size Size of value to assign, or 0 to automatically calculate size from value string. //! \return Pointer to allocated node. This pointer will never be NULL. template xml_node * allocate_node_low(Args... args) { void *memory = allocate_aligned>(); auto *node = new(memory) xml_node(args...); return node; } xml_node * allocate_node(node_type type, view_type const & name, view_type const & value) { auto * node = this->allocate_node_low(type, name); node->value(value); return node; } xml_node * allocate_node(node_type type, view_type const & name) { return this->allocate_node_low(type, name); } xml_node * allocate_node(node_type type) { return this->allocate_node_low(type); } //! Allocates a new attribute from the pool, and optionally assigns name and value to it. //! If the allocation request cannot be accomodated, this function will throw std::bad_alloc. //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function //! will call rapidxml::parse_error_handler() function. //! \param name Name to assign to the attribute, or 0 to assign no name. //! \param value Value to assign to the attribute, or 0 to assign no value. //! \param name_size Size of name to assign, or 0 to automatically calculate size from name string. //! \param value_size Size of value to assign, or 0 to automatically calculate size from value string. //! \return Pointer to allocated attribute. This pointer will never be NULL. template xml_attribute *allocate_attribute_low(Args... args) { void *memory = allocate_aligned>(); auto *attribute = new(memory) xml_attribute(args...); return attribute; } xml_attribute * allocate_attribute(view_type const & name, view_type const & value) { auto * attr = this->allocate_attribute_low(name); attr->value(value); return attr; } xml_attribute * allocate_attribute(view_type const & name) { return this->allocate_attribute_low(name); } xml_attribute * allocate_attribute() { return this->allocate_attribute_low(); } //! Allocates a char array of given size from the pool, and optionally copies a given string to it. //! If the allocation request cannot be accomodated, this function will throw std::bad_alloc. //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function //! will call rapidxml::parse_error_handler() function. //! \param source String to initialize the allocated memory with, or 0 to not initialize it. //! \param size Number of characters to allocate, or zero to calculate it automatically from source string length; if size is 0, source string must be specified and null terminated. //! \return Pointer to allocated char array. This pointer will never be NULL. template std::span allocate_span(std::basic_string_view const & source) { if (source.size() == 0) return {}; // No need to allocate. Ch *result = allocate_aligned(source.size()); for (std::size_t i = 0; i < source.size(); ++i) result[i] = source[i]; return {result, source.size()}; } template view_type allocate_string(std::basic_string_view const & source) { auto span = allocate_span(source); return {span.data(), span.size()}; } template view_type allocate_string(std::basic_string const & source) { return allocate_string(std::basic_string_view{source.data(), source.size()}); } template view_type allocate_string(const Sch * source) { return allocate_string(std::basic_string_view(source)); } view_type const & nullstr() { return m_nullstr; } view_type const & xmlns_xml() { if (m_xmlns_xml.empty()) m_xmlns_xml = allocate_string("http://www.w3.org/XML/1998/namespace"); return m_xmlns_xml; } view_type const & xmlns_xmlns() { if (m_xmlns_xmlns.empty()) m_xmlns_xmlns = allocate_string("http://www.w3.org/2000/xmlns/"); return m_xmlns_xmlns; } //! Clones an xml_node and its hierarchy of child nodes and attributes. //! Nodes and attributes are allocated from this memory pool. //! Names and values are not cloned, they are shared between the clone and the source. //! Result node can be optionally specified as a second parameter, //! in which case its contents will be replaced with cloned source node. //! This is useful when you want to clone entire document. //! \param source Node to clone. //! \param result Node to put results in, or 0 to automatically allocate result node //! \return Pointer to cloned node. This pointer will never be NULL. optional_ptr> clone_node(const optional_ptr> source, bool strings=false) { // Prepare result node auto result = allocate_node(source->type()); auto s = [this, strings](view_type const & sv) { return strings ? this->allocate_string(sv) : sv; }; // Clone name and value result->name(s(source->name())); result->value(s(source->value())); result->prefix(s(source->prefix())); // Clone child nodes and attributes for (auto child = source->first_node(); child; child = child->next_sibling()) result->append_node(clone_node(child, strings)); for (auto attr = source->first_attribute(); attr; attr = attr->next_attribute()) result->append_attribute(allocate_attribute(s(attr->name()), s(attr->value()))); return result; } //! Clears the pool. //! This causes memory occupied by nodes allocated by the pool to be freed. //! Any nodes or strings allocated from the pool will no longer be valid. void clear() { while (m_begin != m_static_memory.data()) { std::size_t s = sizeof(header) * 2; void * h = m_begin; std::align(alignof(header), sizeof(header), h, s); void *previous_begin = reinterpret_cast
(h)->previous_begin; if (m_free_func) m_free_func(m_begin); else delete[] reinterpret_cast(m_begin); m_begin = previous_begin; } init(); } //! Sets or resets the user-defined memory allocation functions for the pool. //! This can only be called when no memory is allocated from the pool yet, otherwise results are undefined. //! Allocation function must not return invalid pointer on failure. It should either throw, //! stop the program, or use longjmp() function to pass control to other place of program. //! If it returns invalid pointer, results are undefined. //!

//! User defined allocation functions must have the following forms: //!
//!
void *allocate(std::size_t size); //!
void free(void *pointer); //!

//! \param af Allocation function, or 0 to restore default function //! \param ff Free function, or 0 to restore default function [[maybe_unused]] void set_allocator(alloc_func af, free_func ff) { assert(m_begin == m_static_memory.data() && m_ptr == m_begin); // Verify that no memory is allocated yet m_alloc_func = af; m_free_func = ff; } private: struct header { void *previous_begin; }; void init() { m_begin = m_static_memory.data(); m_ptr = m_begin; m_space = m_static_memory.size(); } void *allocate_raw(std::size_t size) { // Allocate void *memory; if (m_alloc_func) // Allocate memory using either user-specified allocation function or global operator new[] { memory = m_alloc_func(size); assert(memory); // Allocator is not allowed to return 0, on failure it must either throw, stop the program or use longjmp } else { memory = new char[size]; #ifdef FLXML_NO_EXCEPTIONS if (!memory) // If exceptions are disabled, verify memory allocation, because new will not be able to throw bad_alloc FLXML_PARSE_ERROR("out of memory", 0); #endif } return memory; } template T *allocate_aligned(std::size_t n = 1) { auto size = n * sizeof(T); // Calculate aligned pointer if (!std::align(alignof(T), sizeof(T) * n, m_ptr, m_space)) { // If not enough memory left in current pool, allocate a new pool // Calculate required pool size (may be bigger than RAPIDXML_DYNAMIC_POOL_SIZE) std::size_t pool_size = FLXML_DYNAMIC_POOL_SIZE; if (pool_size < size) pool_size = size; // Allocate std::size_t alloc_size = sizeof(header) + (2 * alignof(header) - 2) + pool_size; // 2 alignments required in worst case: one for header, one for actual allocation void *raw_memory = allocate_raw(alloc_size); // Setup new pool in allocated memory void *new_header = raw_memory; std::align(alignof(header), sizeof(header), new_header, alloc_size); auto * h = reinterpret_cast
(new_header); h->previous_begin = m_begin; m_begin = raw_memory; m_ptr = (h + 1); m_space = alloc_size - sizeof(header); // Calculate aligned pointer again using new pool return allocate_aligned(n); } auto * result = reinterpret_cast(m_ptr); m_ptr = (result + n); m_space -= size; auto blank = reinterpret_cast(result); auto end = blank + size; while (blank != end) *blank++ = 'X'; return result; } void *m_begin = nullptr; // Start of raw memory making up current pool void *m_ptr = nullptr; // First free byte in current pool std::size_t m_space = FLXML_STATIC_POOL_SIZE; // Available space remaining std::array m_static_memory = {}; // Static raw memory alloc_func m_alloc_func = nullptr; // Allocator function, or 0 if default is to be used free_func m_free_func = nullptr; // Free function, or 0 if default is to be used view_type m_nullstr; view_type m_xmlns_xml; view_type m_xmlns_xmlns; }; /////////////////////////////////////////////////////////////////////////// // XML base //! Base class for xml_node and xml_attribute implementing common functions: //! name(), name_size(), value(), value_size() and parent(). //! \param Ch Character type to use template class xml_base { public: using view_type = std::basic_string_view; /////////////////////////////////////////////////////////////////////////// // Construction & destruction // Construct a base with empty name, value and parent xml_base() = default; explicit xml_base(view_type const & name) : m_name(name) {} xml_base(view_type const & name, view_type const & value) : m_name(name), m_value(value) {} /////////////////////////////////////////////////////////////////////////// // Node data access //! Gets name of the node. //! Interpretation of name depends on type of node. //! Note that name will not be zero-terminated if rapidxml::parse_no_string_terminators option was selected during parse. //!

//! Use name_size() function to determine length of the name. //! \return Name of node, or empty string if node has no name. view_type const & name() const { return m_name; } //! Gets value of node. //! Interpretation of value depends on type of node. //! Note that value will not be zero-terminated if rapidxml::parse_no_string_terminators option was selected during parse. //!

//! Use value_size() function to determine length of the value. //! \return Value of node, or empty string if node has no value. view_type const & value_raw() const { return m_value; } /////////////////////////////////////////////////////////////////////////// // Node modification //! Sets name of node to a non zero-terminated string. //! See \ref ownership_of_strings. //!

//! Note that node does not own its name or value, it only stores a pointer to it. //! It will not delete or otherwise free the pointer on destruction. //! It is reponsibility of the user to properly manage lifetime of the string. //! The easiest way to achieve it is to use memory_pool of the document to allocate the string - //! on destruction of the document the string will be automatically freed. //!

//! Size of name must be specified separately, because name does not have to be zero terminated. //! Use name(const Ch *) function to have the length automatically calculated (string must be zero terminated). //! \param name Name of node to set. Does not have to be zero terminated. //! \param size Size of name, in characters. This does not include zero terminator, if one is present. void name(view_type const & name) { m_name = name; } //! Sets value of node to a non zero-terminated string. //! See \ref ownership_of_strings. //!

//! Note that node does not own its name or value, it only stores a pointer to it. //! It will not delete or otherwise free the pointer on destruction. //! It is reponsibility of the user to properly manage lifetime of the string. //! The easiest way to achieve it is to use memory_pool of the document to allocate the string - //! on destruction of the document the string will be automatically freed. //!

//! Size of value must be specified separately, because it does not have to be zero terminated. //! Use value(const Ch *) function to have the length automatically calculated (string must be zero terminated). //!

//! If an element has a child node of type node_data, it will take precedence over element value when printing. //! If you want to manipulate data of elements using values, use parser flag rapidxml::parse_no_data_nodes to prevent creation of data nodes by the parser. //! \param value value of node to set. Does not have to be zero terminated. //! \param size Size of value, in characters. This does not include zero terminator, if one is present. void value_raw(view_type const & value) { m_value = value; } /////////////////////////////////////////////////////////////////////////// // Related nodes access //! Gets node parent. //! \return Pointer to parent node, or 0 if there is no parent. optional_ptr> parent() const { return m_parent; } protected: view_type m_name; // Name of node, or 0 if no name view_type m_value; // Value of node, or 0 if no value xml_node *m_parent = nullptr; // Pointer to parent node, or 0 if none }; //! Class representing attribute node of XML document. //! Each attribute has name and value strings, which are available through name() and value() functions (inherited from xml_base). //! Note that after parse, both name and value of attribute will point to interior of source text used for parsing. //! Thus, this text must persist in memory for the lifetime of attribute. //! \param Ch Character type to use. template class xml_attribute: public xml_base { friend class xml_node; public: using view_type = std::basic_string_view; using ptr = optional_ptr>; /////////////////////////////////////////////////////////////////////////// // Construction & destruction //! Constructs an empty attribute with the specified type. //! Consider using memory_pool of appropriate xml_document if allocating attributes manually. xml_attribute() = default; xml_attribute(view_type const & name) : xml_base(name) {} xml_attribute(view_type const & name, view_type const & value) : xml_base(name, value) {} void quote(Ch q) { m_quote = q; } Ch quote() const { return m_quote; } view_type const & value() const { if (m_value.has_value()) return m_value.value(); m_value = document()->decode_attr_value(this); return m_value.value(); } void value(view_type const & v) { m_value = v; this->value_raw(""); if (this->m_parent) this->m_parent->dirty_parent(); } // Return true if the value has been decoded. bool value_decoded() const { // Either we don't have a decoded value, or we do but it's identical. return !m_value.has_value() || m_value.value().data() != this->value_raw().data(); } /////////////////////////////////////////////////////////////////////////// // Related nodes access //! Gets document of which attribute is a child. //! \return Pointer to document that contains this attribute, or 0 if there is no parent document. optional_ptr> document() const { if (auto node = this->parent()) { return node->document(); } else { return nullptr; } } view_type const & xmlns() const { if (m_xmlns.has_value()) return m_xmlns.value(); auto const & name = this->name(); auto colon = name.find(':'); if (colon != view_type::npos) { auto element = this->parent(); if (element) m_xmlns = element->xmlns_lookup(name.substr(0, colon), true); } else { m_xmlns = document()->nullstr(); } return m_xmlns.value(); } //! Gets previous attribute, optionally matching attribute name. //! \param name Name of attribute to find, or 0 to return previous attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters //! \return Pointer to found attribute, or 0 if not found. optional_ptr> previous_attribute(view_type const & name = {}) const { if (name) { for (xml_attribute *attribute = m_prev_attribute; attribute; attribute = attribute->m_prev_attribute) if (name == attribute->name()) return attribute; return 0; } else return this->m_parent ? m_prev_attribute : 0; } //! Gets next attribute, optionally matching attribute name. //! \param name Name of attribute to find, or 0 to return next attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters //! \return Pointer to found attribute, or 0 if not found. optional_ptr> next_attribute(view_type const & name = {}) const { if (!name.empty()) { for (xml_attribute *attribute = m_next_attribute; attribute; attribute = attribute->m_next_attribute) if (attribute->name() == name) return attribute; return nullptr; } else return this->m_parent ? m_next_attribute : nullptr; } view_type const & local_name() const { if (!m_local_name.empty()) return m_local_name; auto colon = this->name().find(':'); if (colon == view_type::npos) { m_local_name = this->name(); } else { m_local_name = this->name().substr(colon + 1); } return m_local_name; } private: xml_attribute *m_prev_attribute = nullptr; // Pointer to previous sibling of attribute, or 0 if none; only valid if parent is non-zero xml_attribute *m_next_attribute = nullptr; // Pointer to next sibling of attribute, or 0 if none; only valid if parent is non-zero Ch m_quote = 0; // When parsing, this should be set to the containing quote for the value. mutable std::optional m_xmlns; mutable std::optional m_value; // This is the decoded, not raw, value. mutable view_type m_local_name; // ATTN: points inside m_name. }; /////////////////////////////////////////////////////////////////////////// // XML node //! Class representing a node of XML document. //! Each node may have associated name and value strings, which are available through name() and value() functions. //! Interpretation of name and value depends on type of the node. //! Type of node can be determined by using type() function. //!

//! Note that after parse, both name and value of node, if any, will point interior of source text used for parsing. //! Thus, this text must persist in the memory for the lifetime of node. //! \param Ch Character type to use. template class xml_node: public xml_base { public: using view_type = std::basic_string_view; using ptr = optional_ptr>; /////////////////////////////////////////////////////////////////////////// // Construction & destruction //! Constructs an empty node with the specified type. //! Consider using memory_pool of appropriate document to allocate nodes manually. //! \param type Type of node to construct. explicit xml_node(node_type type) : m_type(type) { } xml_node(node_type type, view_type const & name) : xml_base(name), m_type(type) {} xml_node(node_type type, view_type const & name, view_type const & value) : xml_base(name, value), m_type(type) {} /////////////////////////////////////////////////////////////////////////// // Node data access view_type const & value() const { if (m_value.has_value()) return m_value.value(); if (m_type == node_element || m_type == node_data) { m_value = document()->decode_data_value(this); } else { m_value = this->value_raw(); } return m_value.value(); } void dirty() { m_clean = false; dirty_parent(); } void dirty_parent() { if (this->m_parent) this->m_parent->dirty(); } bool clean() const { return m_clean; } void value(view_type const & v) { if (this->m_type == node_element) { // Set the first data node to the value, if one exists. for (auto node = m_first_node; node; node = node->m_next_sibling) { if (node->type() == node_data) { node->value(v); break; } } } m_value = v; this->value_raw(""); dirty(); } bool value_decoded() const { return !m_value.has_value() || m_value.value().data() != this->value_raw().data(); } //! Gets type of node. //! \return Type of node. node_type type() const { return m_type; } void prefix(view_type const & prefix) { m_prefix = prefix; dirty_parent(); } view_type const & prefix() const { return m_prefix; } void contents(view_type const & contents) { m_contents = contents; // Reset to clean here. m_clean = true; } view_type const & contents() const { return m_contents; } view_type const & xmlns() const { if (m_xmlns.has_value()) return m_xmlns.value(); m_xmlns = xmlns_lookup(m_prefix, false); return m_xmlns.value(); } view_type const & xmlns_lookup(view_type const & prefix, bool attribute) const { std::basic_string attrname{"xmlns"}; if (!prefix.empty()) { // Check if the prefix begins "xml". if (prefix.size() >= 3 && prefix.starts_with("xml")) { if (prefix.size() == 3) { return this->document()->xmlns_xml(); } else if (prefix.size() == 5 && prefix[3] == Ch('n') && prefix[4] == Ch('s')) { return this->document()->xmlns_xmlns(); } } attrname += ':'; attrname += prefix; } for (const xml_node * node = this; node; node = node->m_parent) { auto attr = node->first_attribute(attrname); if (attr) { return attr->value(); } } if (!prefix.empty()) { if (attribute) { throw attr_xmlns_unbound(attrname.c_str()); } else { throw element_xmlns_unbound(attrname.c_str()); } } return document()->nullstr(); } /////////////////////////////////////////////////////////////////////////// // Related nodes access //! Gets document of which node is a child. //! \return Pointer to document that contains this node, or 0 if there is no parent document. optional_ptr> document() const { auto *node = this; while (node) { if (node->type() == node_document) { return static_cast *>(const_cast *>(node)); } node = node->parent().ptr_unsafe(); } return nullptr; } flxml::children children() const { return flxml::children{*this}; } flxml::descendants descendants() const { return flxml::descendants{optional_ptr>{const_cast *>(this)}}; } flxml::attributes attributes() const { return flxml::attributes{*this}; } //! Gets first child node, optionally matching node name. //! \param name Name of child to find, or 0 to return first child regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters //! \return Pointer to found child, or 0 if not found. optional_ptr> first_node(view_type const & name = {}, view_type const & asked_xmlns = {}) const { view_type xmlns = asked_xmlns; if (asked_xmlns.empty() && !name.empty()) { // No XMLNS asked for, but a name is present. // Assume "same XMLNS". xmlns = this->xmlns(); } for (xml_node *child = m_first_node; child; child = child->m_next_sibling) { if ((name.empty() || child->name() == name) && (xmlns.empty() || child->xmlns() == xmlns)) { return child; } } return nullptr; } //! Gets last child node, optionally matching node name. //! Behaviour is undefined if node has no children. //! Use first_node() to test if node has children. //! \param name Name of child to find, or 0 to return last child regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters //! \return Pointer to found child, or 0 if not found. optional_ptr> last_node(view_type const & name = {}, view_type const & asked_xmlns = {}) const { view_type xmlns = asked_xmlns; if (asked_xmlns.empty() && !name.empty()) { // No XMLNS asked for, but a name is present. // Assume "same XMLNS". xmlns = this->xmlns(); } for (xml_node *child = m_last_node; child; child = child->m_prev_sibling) { if ((name.empty() || child->name() == name) && (xmlns.empty() || child->xmlns() == xmlns)) { return child; } } return nullptr; } //! Gets previous sibling node, optionally matching node name. //! Behaviour is undefined if node has no parent. //! Use parent() to test if node has a parent. //! \param name Name of sibling to find, or 0 to return previous sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters //! \return Pointer to found sibling, or 0 if not found. optional_ptr> previous_sibling(view_type const & name = {}, view_type const & asked_xmlns = {}) const { assert(this->m_parent); // Cannot query for siblings if node has no parent if (!name.empty()) { view_type xmlns = asked_xmlns; if (xmlns.empty() && !name.empty()) { // No XMLNS asked for, but a name is present. // Assume "same XMLNS". xmlns = this->xmlns(); } for (xml_node *sibling = m_prev_sibling; sibling; sibling = sibling->m_prev_sibling) if ((name.empty() || sibling->name() == name) && (xmlns.empty() || sibling->xmlns() == xmlns)) return sibling; return nullptr; } else return m_prev_sibling; } //! Gets next sibling node, optionally matching node name. //! Behaviour is undefined if node has no parent. //! Use parent() to test if node has a parent. //! \param name Name of sibling to find, or 0 to return next sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero //! \param xmlns Namespace of sibling to find, or 0 to return next sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters //! \return Pointer to found sibling, or 0 if not found. optional_ptr> next_sibling(view_type const & name = {}, view_type const & asked_xmlns = {}) const { assert(this->m_parent); // Cannot query for siblings if node has no parent view_type xmlns = asked_xmlns; if (xmlns.empty() && !name.empty()) { // No XMLNS asked for, but a name is present. // Assume "same XMLNS". xmlns = this->xmlns(); } for (xml_node *sibling = m_next_sibling; sibling; sibling = sibling->m_next_sibling) if ((name.empty() || sibling->name() == name) && (xmlns.empty() || sibling->xmlns() == xmlns)) return sibling; return nullptr; } //! Gets first attribute of node, optionally matching attribute name. //! \param name Name of attribute to find, or 0 to return first attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters //! \return Pointer to found attribute, or 0 if not found. optional_ptr> first_attribute(view_type const & name = {}, view_type const & xmlns = {}) const { for (xml_attribute *attribute = m_first_attribute; attribute; attribute = attribute->m_next_attribute) if ((name.empty() || attribute->name() == name) && (xmlns.empty() || attribute->xmlns() == xmlns)) return attribute; return nullptr; } //! Gets last attribute of node, optionally matching attribute name. //! \param name Name of attribute to find, or 0 to return last attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters //! \return Pointer to found attribute, or 0 if not found. optional_ptr> last_attribute(view_type const & name = {}, view_type const & xmlns = {}) const { for (xml_attribute *attribute = m_last_attribute; attribute; attribute = attribute->m_prev_attribute) if ((name.empty() || attribute->name() == name) && (xmlns.empty() || attribute->xmlns() == xmlns)) return attribute; return nullptr; } /////////////////////////////////////////////////////////////////////////// // Node modification //! Sets type of node. //! \param type Type of node to set. void type(node_type type) { m_type = type; dirty(); } /////////////////////////////////////////////////////////////////////////// // Node manipulation //! Allocate a new element to be added as a child at this node. //! If an XMLNS is specified via the clarke notation syntax, then the prefix will match the parent element (if any), //! and any needed xmlns attributes will be added for you. //! Strings are assumed to remain in scope - you should document()->allocate_string() any that might not. //! \param name Name of the element, either string view, string, or clarke notation protected: // These are too easy to accidentally forget to append, prepend, or insert. optional_ptr> allocate_element(view_type const & name) { return document()->allocate_node(node_element, name); } optional_ptr> allocate_element(std::tuple const & clark_name) { auto [xmlns, name] = clark_name; xml_node * child; if (xmlns != this->xmlns()) { child = document()->allocate_node(node_element, name); child->append_attribute(document()->allocate_attribute("xmlns", xmlns)); } else if (!this->prefix().empty()) { std::basic_string pname = std::string(this->prefix()) + ':'; pname += name; child = document()->allocate_node(node_element, document()->allocate_string(pname)); } else { child = document()->allocate_node(node_element, name); } return child; } optional_ptr> allocate_element(view_type const & name, view_type const & value) { auto child = allocate_element(name); child->value(value); return child; } optional_ptr> allocate_element(std::tuple const & clark_name, view_type const & value) { auto child = allocate_element(clark_name); child->value(value); return child; } optional_ptr> allocate_element(std::initializer_list const & clark_name) { auto it = clark_name.begin(); auto a = *it; auto b = *++it; return allocate_element({view_type(a), view_type(b)}); } optional_ptr> allocate_element(std::initializer_list const & clark_name, view_type const & value) { auto child = allocate_element(clark_name); if (!value.empty()) child->value(value); return child; } public: //! Prepends a new child node. //! The prepended child becomes the first child, and all existing children are moved one position back. //! \param child Node to prepend. optional_ptr> prepend_node(xml_node *child) { assert(child && !child->parent() && child->type() != node_document); dirty(); if (first_node()) { child->m_next_sibling = m_first_node; m_first_node->m_prev_sibling = child; } else { child->m_next_sibling = 0; m_last_node = child; } m_first_node = child; child->m_parent = this; child->m_prev_sibling = 0; return child; } auto prepend_node(optional_ptr> ptr) { return prepend_node(ptr.get()); } auto prepend_element(view_type const & v, view_type const & value = {}) { auto child = allocate_element(v, value); return prepend_node(child); } auto prepend_element(std::tuple const & il, view_type const & value = {}) { auto child = allocate_element(il, value); return prepend_node(child); } auto prepend_element(std::initializer_list const & il, view_type const & value = {}) { auto child = allocate_element(il, value); return prepend_node(child); } //! Appends a new child node. //! The appended child becomes the last child. //! \param child Node to append. optional_ptr> append_node(xml_node *child) { assert(child && !child->parent() && child->type() != node_document); dirty(); if (first_node()) { child->m_prev_sibling = m_last_node; m_last_node->m_next_sibling = child; } else { child->m_prev_sibling = nullptr; m_first_node = child; } m_last_node = child; child->m_parent = this; child->m_next_sibling = nullptr; return child; } optional_ptr> append_node(optional_ptr> ptr) { return append_node(ptr.get()); } auto append_element(view_type const & v, view_type const & value = {}) { auto child = allocate_element(v, value); return append_node(child); } auto append_element(std::tuple const & il, view_type const & value = {}) { auto child = allocate_element(il, value); return append_node(child); } auto append_element(std::initializer_list const & il, view_type const & value = {}) { auto child = allocate_element(il, value); return append_node(child); } //! Inserts a new child node at specified place inside the node. //! All children after and including the specified node are moved one position back. //! \param where Place where to insert the child, or 0 to insert at the back. //! \param child Node to insert. optional_ptr> insert_node(xml_node *where, xml_node *child) { assert(!where || where->parent() == this); assert(child && !child->parent() && child->type() != node_document); dirty(); if (where == m_first_node) prepend_node(child); else if (!where) append_node(child); else { child->m_prev_sibling = where->m_prev_sibling; child->m_next_sibling = where; where->m_prev_sibling->m_next_sibling = child; where->m_prev_sibling = child; child->m_parent = this; } return child; } auto insert_node(optional_ptr> where, optional_ptr> ptr) { return insert_node(where.ptr(), ptr.ptr()); } auto insert_element(optional_ptr> where, view_type const & v, view_type const & value = {}) { auto child = allocate_element(v, value); return insert_node(where, child); } auto insert_element(optional_ptr> where, std::tuple const & il, view_type const & value = {}) { auto child = allocate_element(il, value); return insert_node(where, child); } auto insert_element(optional_ptr> where, std::initializer_list const & il, view_type const & value = {}) { auto child = allocate_element(il, value); return insert_node(where, child); } //! Removes first child node. //! If node has no children, behaviour is undefined. //! Use first_node() to test if node has children. void remove_first_node() { assert(first_node()); dirty(); xml_node *child = m_first_node; m_first_node = child->m_next_sibling; if (child->m_next_sibling) child->m_next_sibling->m_prev_sibling = nullptr; else m_last_node = nullptr; child->m_parent = nullptr; } //! Removes last child of the node. //! If node has no children, behaviour is undefined. //! Use first_node() to test if node has children. void remove_last_node() { assert(first_node()); dirty(); xml_node *child = m_last_node; if (child->m_prev_sibling) { m_last_node = child->m_prev_sibling; child->m_prev_sibling->m_next_sibling = nullptr; } else m_first_node = nullptr; child->m_parent = nullptr; } //! Removes specified child from the node // \param where Pointer to child to be removed. void remove_node(optional_ptr> where) { assert(where->parent() == this); assert(first_node()); dirty(); if (where == m_first_node) remove_first_node(); else if (where == m_last_node) remove_last_node(); else { where->m_prev_sibling->m_next_sibling = where->m_next_sibling; where->m_next_sibling->m_prev_sibling = where->m_prev_sibling; where->m_parent = nullptr; } } //! Removes all child nodes (but not attributes). void remove_all_nodes() { if (!m_first_node) return; dirty(); for (xml_node *node = m_first_node; node; node = node->m_next_sibling) { node->m_parent = nullptr; } m_first_node = nullptr; m_last_node = nullptr; } //! Prepends a new attribute to the node. //! \param attribute Attribute to prepend. void prepend_attribute(xml_attribute *attribute) { assert(attribute && !attribute->parent()); dirty_parent(); if (first_attribute()) { attribute->m_next_attribute = m_first_attribute; m_first_attribute->m_prev_attribute = attribute; } else { attribute->m_next_attribute = nullptr; m_last_attribute = attribute; } m_first_attribute = attribute; attribute->m_parent = this; attribute->m_prev_attribute = nullptr; } //! Appends a new attribute to the node. //! \param attribute Attribute to append. void append_attribute(xml_attribute *attribute) { assert(attribute && !attribute->parent()); dirty_parent(); if (first_attribute()) { attribute->m_prev_attribute = m_last_attribute; m_last_attribute->m_next_attribute = attribute; } else { attribute->m_prev_attribute = nullptr; m_first_attribute = attribute; } m_last_attribute = attribute; attribute->m_parent = this; attribute->m_next_attribute = nullptr; } //! Inserts a new attribute at specified place inside the node. //! All attributes after and including the specified attribute are moved one position back. //! \param where Place where to insert the attribute, or 0 to insert at the back. //! \param attribute Attribute to insert. void insert_attribute(xml_attribute *where, xml_attribute *attribute) { assert(!where || where->parent() == this); assert(attribute && !attribute->parent()); dirty_parent(); if (where == m_first_attribute) prepend_attribute(attribute); else if (!where) append_attribute(attribute); else { attribute->m_prev_attribute = where->m_prev_attribute; attribute->m_next_attribute = where; where->m_prev_attribute->m_next_attribute = attribute; where->m_prev_attribute = attribute; attribute->m_parent = this; } } //! Removes first attribute of the node. //! If node has no attributes, behaviour is undefined. //! Use first_attribute() to test if node has attributes. void remove_first_attribute() { assert(first_attribute()); dirty_parent(); xml_attribute *attribute = m_first_attribute; if (attribute->m_next_attribute) { attribute->m_next_attribute->m_prev_attribute = 0; } else m_last_attribute = nullptr; attribute->m_parent = nullptr; m_first_attribute = attribute->m_next_attribute; } //! Removes last attribute of the node. //! If node has no attributes, behaviour is undefined. //! Use first_attribute() to test if node has attributes. void remove_last_attribute() { assert(first_attribute()); dirty_parent(); xml_attribute *attribute = m_last_attribute; if (attribute->m_prev_attribute) { attribute->m_prev_attribute->m_next_attribute = 0; m_last_attribute = attribute->m_prev_attribute; } else m_first_attribute = nullptr; attribute->m_parent = nullptr; } //! Removes specified attribute from node. //! \param where Pointer to attribute to be removed. void remove_attribute(optional_ptr> where) { assert(first_attribute() && where->parent() == this); dirty_parent(); if (where == m_first_attribute) remove_first_attribute(); else if (where == m_last_attribute) remove_last_attribute(); else { where->m_prev_attribute->m_next_attribute = where->m_next_attribute; where->m_next_attribute->m_prev_attribute = where->m_prev_attribute; where->m_parent = nullptr; } } //! Removes all attributes of node. void remove_all_attributes() { if (!m_first_attribute) return; dirty_parent(); for (xml_attribute *attribute = m_first_attribute; attribute; attribute = attribute->m_next_attribute) { attribute->m_parent = nullptr; } m_first_attribute = nullptr; } void validate() const { this->xmlns(); for (auto child = this->first_node(); child; child = child->next_sibling()) { child->validate(); } for (auto attribute = first_attribute(); attribute; attribute = attribute->m_next_attribute) { attribute->xmlns(); for (auto otherattr = first_attribute(); otherattr != attribute; otherattr = otherattr->m_next_attribute) { if (attribute->name() == otherattr->name()) { throw duplicate_attribute("Attribute doubled"); } if ((attribute->local_name() == otherattr->local_name()) && (attribute->xmlns() == otherattr->xmlns())) throw duplicate_attribute("Attribute XMLNS doubled"); } } } private: /////////////////////////////////////////////////////////////////////////// // Restrictions // No copying xml_node(const xml_node &) = delete; void operator =(const xml_node &) = delete; /////////////////////////////////////////////////////////////////////////// // Data members // Note that some of the pointers below have UNDEFINED values if certain other pointers are 0. // This is required for maximum performance, as it allows the parser to omit initialization of // unneded/redundant values. // // The rules are as follows: // 1. first_node and first_attribute contain valid pointers, or 0 if node has no children/attributes respectively // 2. last_node and last_attribute are valid only if node has at least one child/attribute respectively, otherwise they contain garbage // 3. prev_sibling and next_sibling are valid only if node has a parent, otherwise they contain garbage view_type m_prefix; mutable std::optional m_xmlns; // Cache node_type m_type; // Type of node; always valid xml_node *m_first_node = nullptr; // Pointer to first child node, or 0 if none; always valid xml_node *m_last_node = nullptr; // Pointer to last child node, or 0 if none; this value is only valid if m_first_node is non-zero xml_attribute *m_first_attribute = nullptr; // Pointer to first attribute of node, or 0 if none; always valid xml_attribute *m_last_attribute = nullptr; // Pointer to last attribute of node, or 0 if none; this value is only valid if m_first_attribute is non-zero xml_node *m_prev_sibling = nullptr; // Pointer to previous sibling of node, or 0 if none; this value is only valid if m_parent is non-zero xml_node *m_next_sibling = nullptr; // Pointer to next sibling of node, or 0 if none; this value is only valid if m_parent is non-zero view_type m_contents; // Pointer to original contents in buffer. bool m_clean = false; // Unchanged since parsing (ie, contents are good). mutable std::optional m_value; }; /////////////////////////////////////////////////////////////////////////// // XML document //! This class represents root of the DOM hierarchy. //! It is also an xml_node and a memory_pool through public inheritance. //! Use parse() function to build a DOM tree from a zero-terminated XML text string. //! parse() function allocates memory for nodes and attributes by using functions of xml_document, //! which are inherited from memory_pool. //! To access root node of the document, use the document itself, as if it was an xml_node. //! \param Ch Character type to use. template class xml_document: public xml_node, public memory_pool { public: using view_type = std::basic_string_view; using ptr = optional_ptr>; //! Constructs empty XML document xml_document() : xml_node(node_document) { } //! Parses zero-terminated XML string according to given flags. //! Passed string will be modified by the parser, unless rapidxml::parse_non_destructive flag is used. //! The string must persist for the lifetime of the document. //! In case of error, rapidxml::parse_error exception will be thrown. //!

//! If you want to parse contents of a file, you must first load the file into the memory, and pass pointer to its beginning. //! Make sure that data is zero-terminated. //!

//! Document can be parsed into multiple times. //! Each new call to parse removes previous nodes and attributes (if any), but does not clear memory pool. //! \param text XML data to parse; pointer is non-const to denote fact that this data may be modified by the parser. template auto parse(const Ch * text, xml_document * parent = nullptr) { return this->parse_low(text, parent); } template auto parse(std::basic_string const & str, xml_document * parent = nullptr) { return this->parse_low(str.c_str(), parent); } template requires std::is_same_v auto parse(C const & container, xml_document * parent = nullptr) { return this->parse_low(buffer_ptr(container), parent); } template T parse_low(T text, xml_document * parent) { this->m_parse_flags = Flags; // Remove current contents this->remove_all_nodes(); this->remove_all_attributes(); this->m_parent = parent ? parent->first_node().get() : nullptr; // Parse BOM, if any parse_bom(text); // Parse children while (true) { // Skip whitespace before node skip(text); if (*text == 0) break; // Parse and append new child if (*text == Ch('<')) { ++text; // Skip '<' if (xml_node *node = parse_node(text)) { this->append_node(node); if (Flags & (parse_open_only|parse_parse_one) && node->type() == node_element) { break; } } } else FLXML_PARSE_ERROR("expected <", text); } if (!this->first_node()) FLXML_PARSE_ERROR("no root element", text); return text; } //! Clears the document by deleting all nodes and clearing the memory pool. //! All nodes owned by document pool are destroyed. void clear() { this->remove_all_nodes(); this->remove_all_attributes(); memory_pool::clear(); } template view_type decode_data_value_low(view_type const & v) { buffer_ptr first{v}; if (Flags & parse_normalize_whitespace) { skip(first); } else { skip(first); } if (!*first) return v; auto buf = this->allocate_string(v); auto * start = buf.data(); buffer_ptr tmp{buf}; auto end = (Flags & parse_normalize_whitespace) ? skip_and_expand_character_refs(tmp) : skip_and_expand_character_refs(tmp); // Trim trailing whitespace if flag is set; leading was already trimmed by whitespace skip after > if (Flags & parse_trim_whitespace) { if (Flags & parse_normalize_whitespace) { // Whitespace is already condensed to single space characters by skipping function, so just trim 1 char off the end if (*(end - 1) == Ch(' ')) --end; } else { // Backup until non-whitespace character is found while (whitespace_pred::test(*(end - 1))) --end; } } return {start, end}; } template view_type decode_attr_value_low(view_type const & v) { buffer_ptr first{v}; skip,0>(first); if (!*first || *first == Q) return v; auto buf = this->allocate_string(v); const Ch * start = buf.data(); buffer_ptr tmp{buf}; const Ch * end = skip_and_expand_character_refs,attribute_value_pure_pred,0>(tmp); return {start, end}; } view_type decode_attr_value(const xml_attribute * attr) { if (attr->quote() == Ch('"')) { return decode_attr_value_low<'"'>(attr->value_raw()); } else if (attr->quote() == Ch('\'')){ return decode_attr_value_low<'\''>(attr->value_raw()); } else { return attr->value_raw(); } } view_type decode_data_value(const xml_node * node) { if (node->value_raw().empty()) return node->value_raw(); if (m_parse_flags & parse_normalize_whitespace) { if (m_parse_flags & parse_trim_whitespace) { const int Flags = parse_normalize_whitespace | parse_trim_whitespace; return decode_data_value_low(node->value_raw()); } else { const int Flags = parse_normalize_whitespace; return decode_data_value_low(node->value_raw()); } } else { if (m_parse_flags & parse_trim_whitespace) { const int Flags = parse_trim_whitespace; return decode_data_value_low(node->value_raw()); } else { const int Flags = 0; return decode_data_value_low(node->value_raw()); } } } void validate() const { for (auto child = this->first_node(); child; child = child->next_sibling()) { child->validate(); } } #ifndef RAPIDXML_TESTING private: #endif /////////////////////////////////////////////////////////////////////// // Internal character utility functions // Detect whitespace character struct whitespace_pred { static unsigned char test(Ch ch) { return internal::lookup_tables::lookup_whitespace[static_cast(ch)]; } }; // Detect node name character struct node_name_pred { static unsigned char test(Ch ch) { return internal::lookup_tables::lookup_node_name[static_cast(ch)]; } }; // Detect element name character struct element_name_pred { static unsigned char test(Ch ch) { return internal::lookup_tables::lookup_element_name[static_cast(ch)]; } }; // Detect attribute name character struct attribute_name_pred { static unsigned char test(Ch ch) { return internal::lookup_tables::lookup_attribute_name[static_cast(ch)]; } }; // Detect text character (PCDATA) struct text_pred { static unsigned char test(Ch ch) { return internal::lookup_tables::lookup_text[static_cast(ch)]; } }; // Detect text character (PCDATA) that does not require processing struct text_pure_no_ws_pred { static unsigned char test(Ch ch) { return internal::lookup_tables::lookup_text_pure_no_ws[static_cast(ch)]; } }; // Detect text character (PCDATA) that does not require processing struct text_pure_with_ws_pred { static unsigned char test(Ch ch) { return internal::lookup_tables::lookup_text_pure_with_ws[static_cast(ch)]; } }; // Detect attribute value character template struct attribute_value_pred { static unsigned char test(Ch ch) { if (Quote == Ch('\'')) return internal::lookup_tables::lookup_attribute_data_1[static_cast(ch)]; if (Quote == Ch('\"')) return internal::lookup_tables::lookup_attribute_data_2[static_cast(ch)]; return 0; // Should never be executed, to avoid warnings on Comeau } }; // Detect attribute value character template struct attribute_value_pure_pred { static unsigned char test(Ch ch) { if (Quote == Ch('\'')) return internal::lookup_tables::lookup_attribute_data_1_pure[static_cast(ch)]; if (Quote == Ch('\"')) return internal::lookup_tables::lookup_attribute_data_2_pure[static_cast(ch)]; return 0; // Should never be executed, to avoid warnings on Comeau } }; // Insert coded character, using UTF8 or 8-bit ASCII template static void insert_coded_character(Ch *&text, unsigned long code) { if (Flags & parse_no_utf8) { // Insert 8-bit ASCII character // Todo: possibly verify that code is less than 256 and use replacement char otherwise? text[0] = static_cast(code); text += 1; } else { // Insert UTF8 sequence if (code < 0x80) // 1 byte sequence { text[0] = static_cast(code); text += 1; } else if (code < 0x800) // 2 byte sequence { text[1] = static_cast((code | 0x80) & 0xBF); code >>= 6; text[0] = static_cast(code | 0xC0); text += 2; } else if (code < 0x10000) // 3 byte sequence { text[2] = static_cast((code | 0x80) & 0xBF); code >>= 6; text[1] = static_cast((code | 0x80) & 0xBF); code >>= 6; text[0] = static_cast(code | 0xE0); text += 3; } else if (code < 0x110000) // 4 byte sequence { text[3] = static_cast((code | 0x80) & 0xBF); code >>= 6; text[2] = static_cast((code | 0x80) & 0xBF); code >>= 6; text[1] = static_cast((code | 0x80) & 0xBF); code >>= 6; text[0] = static_cast(code | 0xF0); text += 4; } else // Invalid, only codes up to 0x10FFFF are allowed in Unicode { FLXML_PARSE_ERROR("invalid numeric character entity", text); } } } // Skip characters until predicate evaluates to true template static void skip(Chp & b) { while (StopPred::test(*b)) ++b; } // Skip characters until predicate evaluates to true while doing the following: // - replacing XML character entity references with proper characters (' & " < > &#...;) // - condensing whitespace sequences to single space character template static const Ch *skip_and_expand_character_refs(Chp text) { // If entity translation, whitespace condense and whitespace trimming is disabled, use plain skip if (Flags & parse_no_entity_translation && !(Flags & parse_normalize_whitespace) && !(Flags & parse_trim_whitespace)) { skip(text); return &*text; } // Use simple skip until first modification is detected skip(text); // Use translation skip Chp src = text; Ch * dest = const_cast(&*src); while (StopPred::test(*src)) { // If entity translation is enabled if (!(Flags & parse_no_entity_translation)) { // Test if replacement is needed if (src[0] == Ch('&')) { switch (src[1]) { // & ' case Ch('a'): if (src[2] == Ch('m') && src[3] == Ch('p') && src[4] == Ch(';')) { *dest = Ch('&'); ++dest; src += 5; continue; } if (src[2] == Ch('p') && src[3] == Ch('o') && src[4] == Ch('s') && src[5] == Ch(';')) { *dest = Ch('\''); ++dest; src += 6; continue; } break; // " case Ch('q'): if (src[2] == Ch('u') && src[3] == Ch('o') && src[4] == Ch('t') && src[5] == Ch(';')) { *dest = Ch('"'); ++dest; src += 6; continue; } break; // > case Ch('g'): if (src[2] == Ch('t') && src[3] == Ch(';')) { *dest = Ch('>'); ++dest; src += 4; continue; } break; // < case Ch('l'): if (src[2] == Ch('t') && src[3] == Ch(';')) { *dest = Ch('<'); ++dest; src += 4; continue; } break; // &#...; - assumes ASCII case Ch('#'): if (src[2] == Ch('x')) { unsigned long code = 0; src += 3; // Skip &#x while (true) { unsigned char digit = internal::lookup_tables::lookup_digits[static_cast(*src)]; if (digit == 0xFF) break; code = code * 16 + digit; ++src; } insert_coded_character(dest, code); // Put character in output } else { unsigned long code = 0; src += 2; // Skip &# while (true) { unsigned char digit = internal::lookup_tables::lookup_digits[static_cast(*src)]; if (digit == 0xFF) break; code = code * 10 + digit; ++src; } insert_coded_character(dest, code); // Put character in output } if (*src == Ch(';')) ++src; else FLXML_PARSE_ERROR("expected ;", src); continue; // Something else default: // Ignore, just copy '&' verbatim break; } } } // If whitespace condensing is enabled if (Flags & parse_normalize_whitespace && whitespace_pred::test(*src)) { *dest = Ch(' '); ++dest; // Put single space in dest ++src; // Skip first whitespace char // Skip remaining whitespace chars while (whitespace_pred::test(*src)) ++src; continue; } // No replacement, only copy character *dest++ = *src++; } // Return new end return dest; } /////////////////////////////////////////////////////////////////////// // Internal parsing functions // Parse BOM, if any template void parse_bom(Chp &texta) { Chp text = texta; // UTF-8? if (static_cast(*text++) == 0xEF && static_cast(*text++) == 0xBB && static_cast(*text++) == 0xBF) { texta = text; // Skup utf-8 bom } } // Parse XML declaration ( xml_node *parse_xml_declaration(Chp &text) { // If parsing of declaration is disabled if (!(Flags & parse_declaration_node)) { // Skip until end of declaration while (text[0] != Ch('?') || text[1] != Ch('>')) { if (!text[0]) FLXML_PARSE_ERROR("unexpected end of data", text); ++text; } text += 2; // Skip '?>' return 0; } // Create declaration xml_node *declaration = this->allocate_node(node_declaration); // Skip whitespace before attributes or ?> skip(text); // Parse declaration attributes parse_node_attributes(text, declaration); // Skip ?> if (text[0] != Ch('?') || text[1] != Ch('>')) FLXML_PARSE_ERROR("expected ?>", text); text += 2; return declaration; } // Parse XML comment (' return 0; // Do not produce comment node } // Remember value start Chp value = text; // Skip until end of comment while (text[0] != Ch('-') || text[1] != Ch('-') || text[2] != Ch('>')) { if (!text[0]) FLXML_PARSE_ERROR("unexpected end of data", text); ++text; } // Create comment node xml_node *comment = this->allocate_node(node_comment); comment->value({value, text}); text += 3; // Skip '-->' return comment; } // Parse DOCTYPE template xml_node *parse_doctype(Chp &text) { // Remember value start Chp value = text; // Skip to > while (*text != Ch('>')) { // Determine character type switch (*text) { // If '[' encountered, scan for matching ending ']' using naive algorithm with depth // This works for all W3C test files except for 2 most wicked case Ch('['): { ++text; // Skip '[' int depth = 1; while (depth > 0) { switch (*text) { case Ch('['): ++depth; break; case Ch(']'): --depth; break; case 0: FLXML_PARSE_ERROR("unexpected end of data", text); default: break; } ++text; } break; } // Error on end of text case Ch('\0'): FLXML_PARSE_ERROR("unexpected end of data", text); // Other character, skip it default: ++text; } } // If DOCTYPE nodes enabled if (Flags & parse_doctype_node) { // Create a new doctype node xml_node *doctype = this->allocate_node(node_doctype); doctype->value({value, text}); text += 1; // skip '>' return doctype; } else { text += 1; // skip '>' return 0; } } // Parse PI template xml_node *parse_pi(Chp &text) { // If creation of PI nodes is enabled if (Flags & parse_pi_nodes) { // Create pi node xml_node *pi = this->allocate_node(node_pi); // Extract PI target name Chp name = text; skip(text); if (text == name) FLXML_PARSE_ERROR("expected PI target", text); pi->name({name, text}); // Skip whitespace between pi target and pi skip(text); // Remember start of pi Chp value = text; // Skip to '?>' while (text[0] != Ch('?') || text[1] != Ch('>')) { if (*text == Ch('\0')) FLXML_PARSE_ERROR("unexpected end of data", text); ++text; } // Set pi value (verbatim, no entity expansion or whitespace normalization) pi->value({value, text}); text += 2; // Skip '?>' return pi; } else { // Skip to '?>' while (text[0] != Ch('?') || text[1] != Ch('>')) { if (*text == Ch('\0')) FLXML_PARSE_ERROR("unexpected end of data", text); ++text; } text += 2; // Skip '?>' return 0; } } // Parse and append data // Return character that ends data. // This is necessary because this character might have been overwritten by a terminating 0 template Ch parse_and_append_data(xml_node *node, Chp &text, Chp contents_start) { // Backup to contents start if whitespace trimming is disabled if (!(Flags & parse_trim_whitespace)) text = contents_start; // Skip until end of data. We should check if the contents will need decoding. Chp value = text; bool encoded = false; skip(text); if (text_pred::test(*text)) { encoded = true; skip(text); } // If characters are still left between end and value (this test is only necessary if normalization is enabled) // Create new data node if (!(Flags & parse_no_data_nodes)) { xml_node *data = this->allocate_node(node_data); data->value_raw({value, text}); if (!encoded) data->value(data->value_raw()); node->append_node(data); } // Add data to parent node if no data exists yet if (!(Flags & parse_no_element_values)) { if (node->value_raw().empty()) { node->value_raw({value, text}); if (!encoded) node->value(node->value_raw()); } } // Return character that ends data return *text; } // Parse CDATA template xml_node *parse_cdata(Chp &text) { // If CDATA is disabled if (Flags & parse_no_data_nodes) { // Skip until end of cdata while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) { if (!text[0]) FLXML_PARSE_ERROR("unexpected end of data", text); ++text; } text += 3; // Skip ]]> return 0; // Do not produce CDATA node } // Skip until end of cdata Chp value = text; while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) { if (!text[0]) FLXML_PARSE_ERROR("unexpected end of data", text); ++text; } // Create new cdata node xml_node *cdata = this->allocate_node(node_cdata); cdata->value({value, text}); text += 3; // Skip ]]> return cdata; } // Parse element node template xml_node *parse_element(Chp &text) { // Create element node xml_node *element = this->allocate_node(node_element); // Extract element name Chp prefix = text; view_type qname; skip(text); if (text == prefix) FLXML_PARSE_ERROR("expected element name or prefix", text); if (*text == Ch(':')) { element->prefix({prefix, text}); ++text; Chp name = text; skip(text); if (text == name) FLXML_PARSE_ERROR("expected element local name", text); element->name({name, text}); } else { element->name({prefix, text}); } qname = {prefix, text}; // Skip whitespace between element name and attributes or > skip(text); // Parse attributes, if any parse_node_attributes(text, element); // Once we have all the attributes, we should be able to fully validate: if (Flags & parse_validate_xmlns) this->validate(); // Determine ending type if (*text == Ch('>')) { Chp contents = ++text; Chp contents_end = contents; if (!(Flags & parse_open_only)) contents_end = parse_node_contents(text, element, qname); if (contents != contents_end) element->contents({contents, contents_end}); } else if (*text == Ch('/')) { ++text; if (*text != Ch('>')) FLXML_PARSE_ERROR("expected >", text); ++text; if (Flags & parse_open_only) FLXML_PARSE_ERROR("open_only, but closed", text); } else FLXML_PARSE_ERROR("expected >", text); // Return parsed element return element; } // Determine node type, and parse it template xml_node *parse_node(Chp &text) { // Parse proper node type switch (text[0]) { // <... default: // Parse and append element node return parse_element(text); // (text); } else { // Parse PI return parse_pi(text); } // (text); } break; // (text); } break; // (text); } default: break; } // switch // Attempt to skip other, unrecognized node types starting with ')) { if (*text == 0) FLXML_PARSE_ERROR("unexpected end of data", text); ++text; } ++text; // Skip '>' return 0; // No node recognized } } // Parse contents of the node - children, data etc. // Return end pointer. template Chp parse_node_contents(Chp &text, xml_node *node, view_type const & qname) { Chp retval; // For all children and text while (true) { // Skip whitespace between > and node contents Chp contents_start = text; // Store start of node contents before whitespace is skipped skip(text); Ch next_char = *text; // After data nodes, instead of continuing the loop, control jumps here. // This is because zero termination inside parse_and_append_data() function // would wreak havoc with the above code. // Also, skipping whitespace after data nodes is unnecessary. after_data_node: // Determine what comes next: node closing, child node, data node, or 0? switch (next_char) { // Node closing or child node case Ch('<'): if (text[1] == Ch('/')) { // Node closing retval = text; text += 2; // Skip '(text); if (qname != view_type{closing_name, text}) FLXML_PARSE_ERROR("invalid closing tag name", text); } else { // No validation, just skip name skip(text); } // Skip remaining whitespace after node name skip(text); if (*text != Ch('>')) FLXML_PARSE_ERROR("expected >", text); ++text; // Skip '>' if (Flags & parse_open_only) FLXML_PARSE_ERROR("Unclosed element actually closed.", text); return retval; // Node closed, finished parsing contents } else { // Child node ++text; // Skip '<' if (xml_node *child = parse_node(text)) node->append_node(child); } break; // End of data - error unless we expected this. case Ch('\0'): if (Flags & parse_open_only) { return Chp(); } else { FLXML_PARSE_ERROR("unexpected end of data", text); } // Data node default: next_char = parse_and_append_data(node, text, contents_start); goto after_data_node; // Bypass regular processing after data nodes } } } // Parse XML attributes of the node template void parse_node_attributes(Chp &text, xml_node *node) { // For all attributes while (attribute_name_pred::test(*text)) { // Extract attribute name Chp name = text; ++text; // Skip first character of attribute name skip(text); if (text == name) FLXML_PARSE_ERROR("expected attribute name", name); // Create new attribute xml_attribute *attribute = this->allocate_attribute(view_type{name, text}); node->append_attribute(attribute); // Skip whitespace after attribute name skip(text); // Skip = if (*text != Ch('=')) FLXML_PARSE_ERROR("expected =", text); ++text; // Skip whitespace after = skip(text); // Skip quote and remember if it was ' or " Ch quote = *text; if (quote != Ch('\'') && quote != Ch('"')) FLXML_PARSE_ERROR("expected ' or \"", text); attribute->quote(quote); ++text; // Extract attribute value and expand char refs in it Chp value = text; Chp end; const int AttFlags = Flags & ~parse_normalize_whitespace; // No whitespace normalization in attributes if (quote == Ch('\'')) skip, AttFlags>(text); else skip, AttFlags>(text); end = text; // Set attribute value attribute->value_raw({value, end}); // Make sure that end quote is present if (*text != quote) FLXML_PARSE_ERROR("expected ' or \"", text); ++text; // Skip quote // Skip whitespace after attribute value skip(text); } } private: int m_parse_flags = 0; }; } // Also include this now. #include // Undefine internal macros #undef FLXML_PARSE_ERROR // On MSVC, restore warnings state #ifdef _MSC_VER #pragma warning(pop) #endif #endif ================================================ FILE: include/rapidxml.hpp ================================================ // // Created by dwd on 4/19/25. // #ifndef RAPIDXML_HPP #define RAPIDXML_HPP #include namespace rapidxml = flxml; #endif //RAPIDXML_HPP ================================================ FILE: include/rapidxml_print.hpp ================================================ // // Created by dwd on 4/19/25. // #ifndef RAPIDXML_PRINT_HPP #define RAPIDXML_PRINT_HPP #include namespace rapidxml = flxml; #endif //RAPIDXML_PRINT_HPP ================================================ FILE: license.txt ================================================ Use of this software is granted under one of the following two licenses, to be chosen freely by the user. 1. Boost Software License - Version 1.0 - August 17th, 2003 =============================================================================== Copyright (c) 2006, 2007 Marcin Kalicinski Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 2. The MIT License =============================================================================== Copyright (c) 2006, 2007 Marcin Kalicinski Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: manual.html ================================================

RAPIDXML Manual

Version 1.13

Copyright (C) 2006, 2009 Marcin Kalicinski
See accompanying file license.txt for license information.

Table of Contents

1. What is RapidXml?
1.1 Dependencies And Compatibility
1.2 Character Types And Encodings
1.3 Error Handling
1.4 Memory Allocation
1.5 W3C Compliance
1.6 API Design
1.7 Reliability
1.8 Acknowledgements
2. Two Minute Tutorial
2.1 Parsing
2.2 Accessing The DOM Tree
2.3 Modifying The DOM Tree
2.4 Printing XML
3. Differences From Regular XML Parsers
3.1 Lifetime Of Source Text
3.2 Ownership Of Strings
3.3 Destructive Vs Non-Destructive Mode
4. Performance
4.1 Comparison With Other Parsers
5. Reference

1. What is RapidXml?

RapidXml is an attempt to create the fastest XML DOM parser possible, while retaining useability, portability and reasonable W3C compatibility. It is an in-situ parser written in C++, with parsing speed approaching that of strlen() function executed on the same data.

Entire parser is contained in a single header file, so no building or linking is neccesary. To use it you just need to copy rapidxml.hpp file to a convenient place (such as your project directory), and include it where needed. You may also want to use printing functions contained in header rapidxml_print.hpp.

1.1 Dependencies And Compatibility

RapidXml has no dependencies other than a very small subset of standard C++ library (<cassert>, <cstdlib>, <new> and <exception>, unless exceptions are disabled). It should compile on any reasonably conformant compiler, and was tested on Visual C++ 2003, Visual C++ 2005, Visual C++ 2008, gcc 3, gcc 4, and Comeau 4.3.3. Care was taken that no warnings are produced on these compilers, even with highest warning levels enabled.

1.2 Character Types And Encodings

RapidXml is character type agnostic, and can work both with narrow and wide characters. Current version does not fully support UTF-16 or UTF-32, so use of wide characters is somewhat incapacitated. However, it should succesfully parse wchar_t strings containing UTF-16 or UTF-32 if endianness of the data matches that of the machine. UTF-8 is fully supported, including all numeric character references, which are expanded into appropriate UTF-8 byte sequences (unless you enable parse_no_utf8 flag).

Note that RapidXml performs no decoding - strings returned by name() and value() functions will contain text encoded using the same encoding as source file. Rapidxml understands and expands the following character references: &apos; &amp; &quot; &lt; &gt; &#...; Other character references are not expanded.

1.3 Error Handling

By default, RapidXml uses C++ exceptions to report errors. If this behaviour is undesirable, RAPIDXML_NO_EXCEPTIONS can be defined to suppress exception code. See parse_error class and parse_error_handler() function for more information.

1.4 Memory Allocation

RapidXml uses a special memory pool object to allocate nodes and attributes, because direct allocation using new operator would be far too slow. Underlying memory allocations performed by the pool can be customized by use of memory_pool::set_allocator() function. See class memory_pool for more information.

1.5 W3C Compliance

RapidXml is not a W3C compliant parser, primarily because it ignores DOCTYPE declarations. There is a number of other, minor incompatibilities as well. Still, it can successfully parse and produce complete trees of all valid XML files in W3C conformance suite (over 1000 files specially designed to find flaws in XML processors). In destructive mode it performs whitespace normalization and character entity substitution for a small set of built-in entities.

1.6 API Design

RapidXml API is minimalistic, to reduce code size as much as possible, and facilitate use in embedded environments. Additional convenience functions are provided in separate headers: rapidxml_utils.hpp and rapidxml_print.hpp. Contents of these headers is not an essential part of the library, and is currently not documented (otherwise than with comments in code).

1.7 Reliability

RapidXml is very robust and comes with a large harness of unit tests. Special care has been taken to ensure stability of the parser no matter what source text is thrown at it. One of the unit tests produces 100,000 randomly corrupted variants of XML document, which (when uncorrupted) contains all constructs recognized by RapidXml. RapidXml passes this test when it correctly recognizes that errors have been introduced, and does not crash or loop indefinitely.

Another unit test puts RapidXml head-to-head with another, well estabilished XML parser, and verifies that their outputs match across a wide variety of small and large documents.

Yet another test feeds RapidXml with over 1000 test files from W3C compliance suite, and verifies that correct results are obtained. There are also additional tests that verify each API function separately, and test that various parsing modes work as expected.

1.8 Acknowledgements

I would like to thank Arseny Kapoulkine for his work on pugixml, which was an inspiration for this project. Additional thanks go to Kristen Wegner for creating pugxml, from which pugixml was derived. Janusz Wohlfeil kindly ran RapidXml speed tests on hardware that I did not have access to, allowing me to expand performance comparison table.

2. Two Minute Tutorial

2.1 Parsing

The following code causes RapidXml to parse a zero-terminated string named text:
using namespace rapidxml;
xml_document<> doc;    // character type defaults to char
doc.parse<0>(text);    // 0 means default parse flags
doc object is now a root of DOM tree containing representation of the parsed XML. Because all RapidXml interface is contained inside namespace rapidxml, users must either bring contents of this namespace into scope, or fully qualify all the names. Class xml_document represents a root of the DOM hierarchy. By means of public inheritance, it is also an xml_node and a memory_pool. Template parameter of xml_document::parse() function is used to specify parsing flags, with which you can fine-tune behaviour of the parser. Note that flags must be a compile-time constant.

2.2 Accessing The DOM Tree

To access the DOM tree, use methods of xml_node and xml_attribute classes:
cout << "Name of my first node is: " << doc.first_node()->name() << "\n";
xml_node<> *node = doc.first_node("foobar");
cout << "Node foobar has value " << node->value() << "\n";
for (xml_attribute<> *attr = node->first_attribute();
     attr; attr = attr->next_attribute())
{
    cout << "Node foobar has attribute " << attr->name() << " ";
    cout << "with value " << attr->value() << "\n";
}

2.3 Modifying The DOM Tree

DOM tree produced by the parser is fully modifiable. Nodes and attributes can be added/removed, and their contents changed. The below example creates a HTML document, whose sole contents is a link to google.com website:
xml_document<> doc;
xml_node<> *node = doc.allocate_node(node_element, "a", "Google");
doc.append_node(node);
xml_attribute<> *attr = doc.allocate_attribute("href", "google.com");
node->append_attribute(attr);
One quirk is that nodes and attributes do not own the text of their names and values. This is because normally they only store pointers to the source text. So, when assigning a new name or value to the node, care must be taken to ensure proper lifetime of the string. The easiest way to achieve it is to allocate the string from the xml_document memory pool. In the above example this is not necessary, because we are only assigning character constants. But the code below uses memory_pool::allocate_string() function to allocate node name (which will have the same lifetime as the document), and assigns it to a new node:
xml_document<> doc;
char *node_name = doc.allocate_string(name);        // Allocate string and copy name into it
xml_node<> *node = doc.allocate_node(node_element, node_name);  // Set node name to node_name
Check Reference section for description of the entire interface.

2.4 Printing XML

You can print xml_document and xml_node objects into an XML string. Use print() function or operator <<, which are defined in rapidxml_print.hpp header.
using namespace rapidxml;
xml_document<> doc;    // character type defaults to char
// ... some code to fill the document

// Print to stream using operator <<
std::cout << doc;   

// Print to stream using print function, specifying printing flags
print(std::cout, doc, 0);   // 0 means default printing flags

// Print to string using output iterator
std::string s;
print(std::back_inserter(s), doc, 0);

// Print to memory buffer using output iterator
char buffer[4096];                      // You are responsible for making the buffer large enough!
char *end = print(buffer, doc, 0);      // end contains pointer to character after last printed character
*end = 0;                               // Add string terminator after XML

3. Differences From Regular XML Parsers

RapidXml is an in-situ parser, which allows it to achieve very high parsing speed. In-situ means that parser does not make copies of strings. Instead, it places pointers to the source text in the DOM hierarchy.

3.1 Lifetime Of Source Text

In-situ parsing requires that source text lives at least as long as the document object. If source text is destroyed, names and values of nodes in DOM tree will become destroyed as well. Additionally, whitespace processing, character entity translation, and zero-termination of strings require that source text be modified during parsing (but see non-destructive mode). This makes the text useless for further processing once it was parsed by RapidXml.

In many cases however, these are not serious issues.

3.2 Ownership Of Strings

Nodes and attributes produced by RapidXml do not own their name and value strings. They merely hold the pointers to them. This means you have to be careful when setting these values manually, by using xml_base::name(const Ch *) or xml_base::value(const Ch *) functions. Care must be taken to ensure that lifetime of the string passed is at least as long as lifetime of the node/attribute. The easiest way to achieve it is to allocate the string from memory_pool owned by the document. Use memory_pool::allocate_string() function for this purpose.

3.3 Destructive Vs Non-Destructive Mode

By default, the parser modifies source text during the parsing process. This is required to achieve character entity translation, whitespace normalization, and zero-termination of strings.

In some cases this behaviour may be undesirable, for example if source text resides in read only memory, or is mapped to memory directly from file. By using appropriate parser flags (parse_non_destructive), source text modifications can be disabled. However, because RapidXml does in-situ parsing, it obviously has the following side-effects:

4. Performance

RapidXml achieves its speed through use of several techniques:
  • In-situ parsing. When building DOM tree, RapidXml does not make copies of string data, such as node names and values. Instead, it stores pointers to interior of the source text.
  • Use of template metaprogramming techniques. This allows it to move much of the work to compile time. Through magic of the templates, C++ compiler generates a separate copy of parsing code for any combination of parser flags you use. In each copy, all possible decisions are made at compile time and all unused code is omitted.
  • Extensive use of lookup tables for parsing.
  • Hand-tuned C++ with profiling done on several most popular CPUs.
This results in a very small and fast code: a parser which is custom tailored to exact needs with each invocation.

4.1 Comparison With Other Parsers

The table below compares speed of RapidXml to some other parsers, and to strlen() function executed on the same data. On a modern CPU (as of 2007), you can expect parsing throughput to be close to 1 GB/s. As a rule of thumb, parsing speed is about 50-100x faster than Xerces DOM, 30-60x faster than TinyXml, 3-12x faster than pugxml, and about 5% - 30% faster than pugixml, the fastest XML parser I know of.
  • The test file is a real-world, 50kB large, moderately dense XML file.
  • All timing is done by using RDTSC instruction present in Pentium-compatible CPUs.
  • No profile-guided optimizations are used.
  • All parsers are running in their fastest modes.
  • The results are given in CPU cycles per character, so frequency of CPUs is irrelevant.
  • The results are minimum values from a large number of runs, to minimize effects of operating system activity, task switching, interrupt handling etc.
  • A single parse of the test file takes about 1/10th of a millisecond, so with large number of runs there is a good chance of hitting at least one no-interrupt streak, and obtaining undisturbed results.
Platform
Compiler
strlen() RapidXml pugixml 0.3 pugxml TinyXml
Pentium 4
MSVC 8.0
2.5
5.4
7.0
61.7
298.8
Pentium 4
gcc 4.1.1
0.8
6.1
9.5
67.0
413.2
Core 2
MSVC 8.0
1.0
4.5
5.0
24.6
154.8
Core 2
gcc 4.1.1
0.6
4.6
5.4
28.3
229.3
Athlon XP
MSVC 8.0
3.1
7.7
8.0
25.5
182.6
Athlon XP
gcc 4.1.1
0.9
8.2
9.2
33.7
265.2
Pentium 3
MSVC 8.0
2.0
6.3
7.0
30.9
211.9
Pentium 3
gcc 4.1.1
1.0
6.7
8.9
35.3
316.0
(*) All results are in CPU cycles per character of source text

5. Reference

This section lists all classes, functions, constants etc. and describes them in detail.
class template rapidxml::memory_pool
constructor memory_pool()
destructor ~memory_pool()
function allocate_node(node_type type, const Ch *name=0, const Ch *value=0, std::size_t name_size=0, std::size_t value_size=0)
function allocate_attribute(const Ch *name=0, const Ch *value=0, std::size_t name_size=0, std::size_t value_size=0)
function allocate_string(const Ch *source=0, std::size_t size=0)
function clone_node(const xml_node< Ch > *source, xml_node< Ch > *result=0)
function clear()
function set_allocator(alloc_func *af, free_func *ff)

class rapidxml::parse_error
constructor parse_error(const char *what, void *where)
function what() const
function where() const

class template rapidxml::xml_attribute
constructor xml_attribute()
function document() const
function previous_attribute(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const
function next_attribute(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const

class template rapidxml::xml_base
constructor xml_base()
function name() const
function name_size() const
function value() const
function value_size() const
function name(const Ch *name, std::size_t size)
function name(const Ch *name)
function value(const Ch *value, std::size_t size)
function value(const Ch *value)
function parent() const

class template rapidxml::xml_document
constructor xml_document()
function parse(Ch *text)
function clear()

class template rapidxml::xml_node
constructor xml_node(node_type type)
function type() const
function document() const
function first_node(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const
function last_node(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const
function previous_sibling(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const
function next_sibling(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const
function first_attribute(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const
function last_attribute(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const
function type(node_type type)
function prepend_node(xml_node< Ch > *child)
function append_node(xml_node< Ch > *child)
function insert_node(xml_node< Ch > *where, xml_node< Ch > *child)
function remove_first_node()
function remove_last_node()
function remove_node(xml_node< Ch > *where)
function remove_all_nodes()
function prepend_attribute(xml_attribute< Ch > *attribute)
function append_attribute(xml_attribute< Ch > *attribute)
function insert_attribute(xml_attribute< Ch > *where, xml_attribute< Ch > *attribute)
function remove_first_attribute()
function remove_last_attribute()
function remove_attribute(xml_attribute< Ch > *where)
function remove_all_attributes()

namespace rapidxml
enum node_type
function parse_error_handler(const char *what, void *where)
function print(OutIt out, const xml_node< Ch > &node, int flags=0)
function print(std::basic_ostream< Ch > &out, const xml_node< Ch > &node, int flags=0)
function operator<<(std::basic_ostream< Ch > &out, const xml_node< Ch > &node)
constant parse_no_data_nodes
constant parse_no_element_values
constant parse_no_string_terminators
constant parse_no_entity_translation
constant parse_no_utf8
constant parse_declaration_node
constant parse_comment_nodes
constant parse_doctype_node
constant parse_pi_nodes
constant parse_validate_closing_tags
constant parse_trim_whitespace
constant parse_normalize_whitespace
constant parse_default
constant parse_non_destructive
constant parse_fastest
constant parse_full
constant print_no_indenting


class template rapidxml::memory_pool

Defined in rapidxml.hpp
Base class for xml_document

Description

This class is used by the parser to create new nodes and attributes, without overheads of dynamic memory allocation. In most cases, you will not need to use this class directly. However, if you need to create nodes manually or modify names/values of nodes, you are encouraged to use memory_pool of relevant xml_document to allocate the memory. Not only is this faster than allocating them by using new operator, but also their lifetime will be tied to the lifetime of document, possibly simplyfing memory management.

Call allocate_node() or allocate_attribute() functions to obtain new nodes or attributes from the pool. You can also call allocate_string() function to allocate strings. Such strings can then be used as names or values of nodes without worrying about their lifetime. Note that there is no free() function -- all allocations are freed at once when clear() function is called, or when the pool is destroyed.

It is also possible to create a standalone memory_pool, and use it to allocate nodes, whose lifetime will not be tied to any document.

Pool maintains RAPIDXML_STATIC_POOL_SIZE bytes of statically allocated memory. Until static memory is exhausted, no dynamic memory allocations are done. When static memory is exhausted, pool allocates additional blocks of memory of size RAPIDXML_DYNAMIC_POOL_SIZE each, by using global new[] and delete[] operators. This behaviour can be changed by setting custom allocation routines. Use set_allocator() function to set them.

Allocations for nodes, attributes and strings are aligned at RAPIDXML_ALIGNMENT bytes. This value defaults to the size of pointer on target architecture.

To obtain absolutely top performance from the parser, it is important that all nodes are allocated from a single, contiguous block of memory. Otherwise, cache misses when jumping between two (or more) disjoint blocks of memory can slow down parsing quite considerably. If required, you can tweak RAPIDXML_STATIC_POOL_SIZE, RAPIDXML_DYNAMIC_POOL_SIZE and RAPIDXML_ALIGNMENT to obtain best wasted memory to performance compromise. To do it, define their values before rapidxml.hpp file is included.

Parameters

Ch
Character type of created nodes.

constructor memory_pool::memory_pool

Synopsis

memory_pool();

Description

Constructs empty pool with default allocator functions.

destructor memory_pool::~memory_pool

Synopsis

~memory_pool();

Description

Destroys pool and frees all the memory. This causes memory occupied by nodes allocated by the pool to be freed. Nodes allocated from the pool are no longer valid.

function memory_pool::allocate_node

Synopsis

xml_node<Ch>* allocate_node(node_type type, const Ch *name=0, const Ch *value=0, std::size_t name_size=0, std::size_t value_size=0);

Description

Allocates a new node from the pool, and optionally assigns name and value to it. If the allocation request cannot be accomodated, this function will throw std::bad_alloc. If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function will call rapidxml::parse_error_handler() function.

Parameters

type
Type of node to create.
name
Name to assign to the node, or 0 to assign no name.
value
Value to assign to the node, or 0 to assign no value.
name_size
Size of name to assign, or 0 to automatically calculate size from name string.
value_size
Size of value to assign, or 0 to automatically calculate size from value string.

Returns

Pointer to allocated node. This pointer will never be NULL.

function memory_pool::allocate_attribute

Synopsis

xml_attribute<Ch>* allocate_attribute(const Ch *name=0, const Ch *value=0, std::size_t name_size=0, std::size_t value_size=0);

Description

Allocates a new attribute from the pool, and optionally assigns name and value to it. If the allocation request cannot be accomodated, this function will throw std::bad_alloc. If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function will call rapidxml::parse_error_handler() function.

Parameters

name
Name to assign to the attribute, or 0 to assign no name.
value
Value to assign to the attribute, or 0 to assign no value.
name_size
Size of name to assign, or 0 to automatically calculate size from name string.
value_size
Size of value to assign, or 0 to automatically calculate size from value string.

Returns

Pointer to allocated attribute. This pointer will never be NULL.

function memory_pool::allocate_string

Synopsis

Ch* allocate_string(const Ch *source=0, std::size_t size=0);

Description

Allocates a char array of given size from the pool, and optionally copies a given string to it. If the allocation request cannot be accomodated, this function will throw std::bad_alloc. If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function will call rapidxml::parse_error_handler() function.

Parameters

source
String to initialize the allocated memory with, or 0 to not initialize it.
size
Number of characters to allocate, or zero to calculate it automatically from source string length; if size is 0, source string must be specified and null terminated.

Returns

Pointer to allocated char array. This pointer will never be NULL.

function memory_pool::clone_node

Synopsis

xml_node<Ch>* clone_node(const xml_node< Ch > *source, xml_node< Ch > *result=0);

Description

Clones an xml_node and its hierarchy of child nodes and attributes. Nodes and attributes are allocated from this memory pool. Names and values are not cloned, they are shared between the clone and the source. Result node can be optionally specified as a second parameter, in which case its contents will be replaced with cloned source node. This is useful when you want to clone entire document.

Parameters

source
Node to clone.
result
Node to put results in, or 0 to automatically allocate result node

Returns

Pointer to cloned node. This pointer will never be NULL.

function memory_pool::clear

Synopsis

void clear();

Description

Clears the pool. This causes memory occupied by nodes allocated by the pool to be freed. Any nodes or strings allocated from the pool will no longer be valid.

function memory_pool::set_allocator

Synopsis

void set_allocator(alloc_func *af, free_func *ff);

Description

Sets or resets the user-defined memory allocation functions for the pool. This can only be called when no memory is allocated from the pool yet, otherwise results are undefined. Allocation function must not return invalid pointer on failure. It should either throw, stop the program, or use longjmp() function to pass control to other place of program. If it returns invalid pointer, results are undefined.

User defined allocation functions must have the following forms:

void *allocate(std::size_t size);
void free(void *pointer);

Parameters

af
Allocation function, or 0 to restore default function
ff
Free function, or 0 to restore default function

class rapidxml::parse_error

Defined in rapidxml.hpp

Description

Parse error exception. This exception is thrown by the parser when an error occurs. Use what() function to get human-readable error message. Use where() function to get a pointer to position within source text where error was detected.

If throwing exceptions by the parser is undesirable, it can be disabled by defining RAPIDXML_NO_EXCEPTIONS macro before rapidxml.hpp is included. This will cause the parser to call rapidxml::parse_error_handler() function instead of throwing an exception. This function must be defined by the user.

This class derives from std::exception class.

constructor parse_error::parse_error

Synopsis

parse_error(const char *what, void *where);

Description

Constructs parse error.

function parse_error::what

Synopsis

virtual const char* what() const;

Description

Gets human readable description of error.

Returns

Pointer to null terminated description of the error.

function parse_error::where

Synopsis

Ch* where() const;

Description

Gets pointer to character data where error happened. Ch should be the same as char type of xml_document that produced the error.

Returns

Pointer to location within the parsed string where error occured.

class template rapidxml::xml_attribute

Defined in rapidxml.hpp
Inherits from xml_base

Description

Class representing attribute node of XML document. Each attribute has name and value strings, which are available through name() and value() functions (inherited from xml_base). Note that after parse, both name and value of attribute will point to interior of source text used for parsing. Thus, this text must persist in memory for the lifetime of attribute.

Parameters

Ch
Character type to use.

constructor xml_attribute::xml_attribute

Synopsis

xml_attribute();

Description

Constructs an empty attribute with the specified type. Consider using memory_pool of appropriate xml_document if allocating attributes manually.

function xml_attribute::document

Synopsis

xml_document<Ch>* document() const;

Description

Gets document of which attribute is a child.

Returns

Pointer to document that contains this attribute, or 0 if there is no parent document.

function xml_attribute::previous_attribute

Synopsis

xml_attribute<Ch>* previous_attribute(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const;

Description

Gets previous attribute, optionally matching attribute name.

Parameters

name
Name of attribute to find, or 0 to return previous attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero
name_size
Size of name, in characters, or 0 to have size calculated automatically from string
case_sensitive
Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters

Returns

Pointer to found attribute, or 0 if not found.

function xml_attribute::next_attribute

Synopsis

xml_attribute<Ch>* next_attribute(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const;

Description

Gets next attribute, optionally matching attribute name.

Parameters

name
Name of attribute to find, or 0 to return next attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero
name_size
Size of name, in characters, or 0 to have size calculated automatically from string
case_sensitive
Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters

Returns

Pointer to found attribute, or 0 if not found.

class template rapidxml::xml_base

Defined in rapidxml.hpp
Base class for xml_attribute xml_node

Description

Base class for xml_node and xml_attribute implementing common functions: name(), name_size(), value(), value_size() and parent().

Parameters

Ch
Character type to use

constructor xml_base::xml_base

Synopsis

xml_base();

function xml_base::name

Synopsis

Ch* name() const;

Description

Gets name of the node. Interpretation of name depends on type of node. Note that name will not be zero-terminated if rapidxml::parse_no_string_terminators option was selected during parse.

Use name_size() function to determine length of the name.

Returns

Name of node, or empty string if node has no name.

function xml_base::name_size

Synopsis

std::size_t name_size() const;

Description

Gets size of node name, not including terminator character. This function works correctly irrespective of whether name is or is not zero terminated.

Returns

Size of node name, in characters.

function xml_base::value

Synopsis

Ch* value() const;

Description

Gets value of node. Interpretation of value depends on type of node. Note that value will not be zero-terminated if rapidxml::parse_no_string_terminators option was selected during parse.

Use value_size() function to determine length of the value.

Returns

Value of node, or empty string if node has no value.

function xml_base::value_size

Synopsis

std::size_t value_size() const;

Description

Gets size of node value, not including terminator character. This function works correctly irrespective of whether value is or is not zero terminated.

Returns

Size of node value, in characters.

function xml_base::name

Synopsis

void name(const Ch *name, std::size_t size);

Description

Sets name of node to a non zero-terminated string. See Ownership Of Strings .

Note that node does not own its name or value, it only stores a pointer to it. It will not delete or otherwise free the pointer on destruction. It is reponsibility of the user to properly manage lifetime of the string. The easiest way to achieve it is to use memory_pool of the document to allocate the string - on destruction of the document the string will be automatically freed.

Size of name must be specified separately, because name does not have to be zero terminated. Use name(const Ch *) function to have the length automatically calculated (string must be zero terminated).

Parameters

name
Name of node to set. Does not have to be zero terminated.
size
Size of name, in characters. This does not include zero terminator, if one is present.

function xml_base::name

Synopsis

void name(const Ch *name);

Description

Sets name of node to a zero-terminated string. See also Ownership Of Strings and xml_node::name(const Ch *, std::size_t).

Parameters

name
Name of node to set. Must be zero terminated.

function xml_base::value

Synopsis

void value(const Ch *value, std::size_t size);

Description

Sets value of node to a non zero-terminated string. See Ownership Of Strings .

Note that node does not own its name or value, it only stores a pointer to it. It will not delete or otherwise free the pointer on destruction. It is reponsibility of the user to properly manage lifetime of the string. The easiest way to achieve it is to use memory_pool of the document to allocate the string - on destruction of the document the string will be automatically freed.

Size of value must be specified separately, because it does not have to be zero terminated. Use value(const Ch *) function to have the length automatically calculated (string must be zero terminated).

If an element has a child node of type node_data, it will take precedence over element value when printing. If you want to manipulate data of elements using values, use parser flag rapidxml::parse_no_data_nodes to prevent creation of data nodes by the parser.

Parameters

value
value of node to set. Does not have to be zero terminated.
size
Size of value, in characters. This does not include zero terminator, if one is present.

function xml_base::value

Synopsis

void value(const Ch *value);

Description

Sets value of node to a zero-terminated string. See also Ownership Of Strings and xml_node::value(const Ch *, std::size_t).

Parameters

value
Vame of node to set. Must be zero terminated.

function xml_base::parent

Synopsis

xml_node<Ch>* parent() const;

Description

Gets node parent.

Returns

Pointer to parent node, or 0 if there is no parent.

class template rapidxml::xml_document

Defined in rapidxml.hpp
Inherits from xml_node memory_pool

Description

This class represents root of the DOM hierarchy. It is also an xml_node and a memory_pool through public inheritance. Use parse() function to build a DOM tree from a zero-terminated XML text string. parse() function allocates memory for nodes and attributes by using functions of xml_document, which are inherited from memory_pool. To access root node of the document, use the document itself, as if it was an xml_node.

Parameters

Ch
Character type to use.

constructor xml_document::xml_document

Synopsis

xml_document();

Description

Constructs empty XML document.

function xml_document::parse

Synopsis

void parse(Ch *text);

Description

Parses zero-terminated XML string according to given flags. Passed string will be modified by the parser, unless rapidxml::parse_non_destructive flag is used. The string must persist for the lifetime of the document. In case of error, rapidxml::parse_error exception will be thrown.

If you want to parse contents of a file, you must first load the file into the memory, and pass pointer to its beginning. Make sure that data is zero-terminated.

Document can be parsed into multiple times. Each new call to parse removes previous nodes and attributes (if any), but does not clear memory pool.

Parameters

text
XML data to parse; pointer is non-const to denote fact that this data may be modified by the parser.

function xml_document::clear

Synopsis

void clear();

Description

Clears the document by deleting all nodes and clearing the memory pool. All nodes owned by document pool are destroyed.

class template rapidxml::xml_node

Defined in rapidxml.hpp
Inherits from xml_base
Base class for xml_document

Description

Class representing a node of XML document. Each node may have associated name and value strings, which are available through name() and value() functions. Interpretation of name and value depends on type of the node. Type of node can be determined by using type() function.

Note that after parse, both name and value of node, if any, will point interior of source text used for parsing. Thus, this text must persist in the memory for the lifetime of node.

Parameters

Ch
Character type to use.

constructor xml_node::xml_node

Synopsis

xml_node(node_type type);

Description

Constructs an empty node with the specified type. Consider using memory_pool of appropriate document to allocate nodes manually.

Parameters

type
Type of node to construct.

function xml_node::type

Synopsis

node_type type() const;

Description

Gets type of node.

Returns

Type of node.

function xml_node::document

Synopsis

xml_document<Ch>* document() const;

Description

Gets document of which node is a child.

Returns

Pointer to document that contains this node, or 0 if there is no parent document.

function xml_node::first_node

Synopsis

xml_node<Ch>* first_node(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const;

Description

Gets first child node, optionally matching node name.

Parameters

name
Name of child to find, or 0 to return first child regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero
name_size
Size of name, in characters, or 0 to have size calculated automatically from string
case_sensitive
Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters

Returns

Pointer to found child, or 0 if not found.

function xml_node::last_node

Synopsis

xml_node<Ch>* last_node(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const;

Description

Gets last child node, optionally matching node name. Behaviour is undefined if node has no children. Use first_node() to test if node has children.

Parameters

name
Name of child to find, or 0 to return last child regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero
name_size
Size of name, in characters, or 0 to have size calculated automatically from string
case_sensitive
Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters

Returns

Pointer to found child, or 0 if not found.

function xml_node::previous_sibling

Synopsis

xml_node<Ch>* previous_sibling(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const;

Description

Gets previous sibling node, optionally matching node name. Behaviour is undefined if node has no parent. Use parent() to test if node has a parent.

Parameters

name
Name of sibling to find, or 0 to return previous sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero
name_size
Size of name, in characters, or 0 to have size calculated automatically from string
case_sensitive
Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters

Returns

Pointer to found sibling, or 0 if not found.

function xml_node::next_sibling

Synopsis

xml_node<Ch>* next_sibling(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const;

Description

Gets next sibling node, optionally matching node name. Behaviour is undefined if node has no parent. Use parent() to test if node has a parent.

Parameters

name
Name of sibling to find, or 0 to return next sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero
name_size
Size of name, in characters, or 0 to have size calculated automatically from string
case_sensitive
Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters

Returns

Pointer to found sibling, or 0 if not found.

function xml_node::first_attribute

Synopsis

xml_attribute<Ch>* first_attribute(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const;

Description

Gets first attribute of node, optionally matching attribute name.

Parameters

name
Name of attribute to find, or 0 to return first attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero
name_size
Size of name, in characters, or 0 to have size calculated automatically from string
case_sensitive
Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters

Returns

Pointer to found attribute, or 0 if not found.

function xml_node::last_attribute

Synopsis

xml_attribute<Ch>* last_attribute(const Ch *name=0, std::size_t name_size=0, bool case_sensitive=true) const;

Description

Gets last attribute of node, optionally matching attribute name.

Parameters

name
Name of attribute to find, or 0 to return last attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero
name_size
Size of name, in characters, or 0 to have size calculated automatically from string
case_sensitive
Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters

Returns

Pointer to found attribute, or 0 if not found.

function xml_node::type

Synopsis

void type(node_type type);

Description

Sets type of node.

Parameters

type
Type of node to set.

function xml_node::prepend_node

Synopsis

void prepend_node(xml_node< Ch > *child);

Description

Prepends a new child node. The prepended child becomes the first child, and all existing children are moved one position back.

Parameters

child
Node to prepend.

function xml_node::append_node

Synopsis

void append_node(xml_node< Ch > *child);

Description

Appends a new child node. The appended child becomes the last child.

Parameters

child
Node to append.

function xml_node::insert_node

Synopsis

void insert_node(xml_node< Ch > *where, xml_node< Ch > *child);

Description

Inserts a new child node at specified place inside the node. All children after and including the specified node are moved one position back.

Parameters

where
Place where to insert the child, or 0 to insert at the back.
child
Node to insert.

function xml_node::remove_first_node

Synopsis

void remove_first_node();

Description

Removes first child node. If node has no children, behaviour is undefined. Use first_node() to test if node has children.

function xml_node::remove_last_node

Synopsis

void remove_last_node();

Description

Removes last child of the node. If node has no children, behaviour is undefined. Use first_node() to test if node has children.

function xml_node::remove_node

Synopsis

void remove_node(xml_node< Ch > *where);

Description

Removes specified child from the node.

function xml_node::remove_all_nodes

Synopsis

void remove_all_nodes();

Description

Removes all child nodes (but not attributes).

function xml_node::prepend_attribute

Synopsis

void prepend_attribute(xml_attribute< Ch > *attribute);

Description

Prepends a new attribute to the node.

Parameters

attribute
Attribute to prepend.

function xml_node::append_attribute

Synopsis

void append_attribute(xml_attribute< Ch > *attribute);

Description

Appends a new attribute to the node.

Parameters

attribute
Attribute to append.

function xml_node::insert_attribute

Synopsis

void insert_attribute(xml_attribute< Ch > *where, xml_attribute< Ch > *attribute);

Description

Inserts a new attribute at specified place inside the node. All attributes after and including the specified attribute are moved one position back.

Parameters

where
Place where to insert the attribute, or 0 to insert at the back.
attribute
Attribute to insert.

function xml_node::remove_first_attribute

Synopsis

void remove_first_attribute();

Description

Removes first attribute of the node. If node has no attributes, behaviour is undefined. Use first_attribute() to test if node has attributes.

function xml_node::remove_last_attribute

Synopsis

void remove_last_attribute();

Description

Removes last attribute of the node. If node has no attributes, behaviour is undefined. Use first_attribute() to test if node has attributes.

function xml_node::remove_attribute

Synopsis

void remove_attribute(xml_attribute< Ch > *where);

Description

Removes specified attribute from node.

Parameters

where
Pointer to attribute to be removed.

function xml_node::remove_all_attributes

Synopsis

void remove_all_attributes();

Description

Removes all attributes of node.

enum node_type

Description

Enumeration listing all node types produced by the parser. Use xml_node::type() function to query node type.

Values

node_document
A document node. Name and value are empty.
node_element
An element node. Name contains element name. Value contains text of first data node.
node_data
A data node. Name is empty. Value contains data text.
node_cdata
A CDATA node. Name is empty. Value contains data text.
node_comment
A comment node. Name is empty. Value contains comment text.
node_declaration
A declaration node. Name and value are empty. Declaration parameters (version, encoding and standalone) are in node attributes.
node_doctype
A DOCTYPE node. Name is empty. Value contains DOCTYPE text.
node_pi
A PI node. Name contains target. Value contains instructions.

function parse_error_handler

Synopsis

void rapidxml::parse_error_handler(const char *what, void *where);

Description

When exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function is called to notify user about the error. It must be defined by the user.

This function cannot return. If it does, the results are undefined.

A very simple definition might look like that: void rapidxml::parse_error_handler(const char *what, void *where) { std::cout << "Parse error: " << what << "\n"; std::abort(); }

Parameters

what
Human readable description of the error.
where
Pointer to character data where error was detected.

function print

Synopsis

OutIt rapidxml::print(OutIt out, const xml_node< Ch > &node, int flags=0);

Description

Prints XML to given output iterator.

Parameters

out
Output iterator to print to.
node
Node to be printed. Pass xml_document to print entire document.
flags
Flags controlling how XML is printed.

Returns

Output iterator pointing to position immediately after last character of printed text.

function print

Synopsis

std::basic_ostream<Ch>& rapidxml::print(std::basic_ostream< Ch > &out, const xml_node< Ch > &node, int flags=0);

Description

Prints XML to given output stream.

Parameters

out
Output stream to print to.
node
Node to be printed. Pass xml_document to print entire document.
flags
Flags controlling how XML is printed.

Returns

Output stream.

function operator<<

Synopsis

std::basic_ostream<Ch>& rapidxml::operator<<(std::basic_ostream< Ch > &out, const xml_node< Ch > &node);

Description

Prints formatted XML to given output stream. Uses default printing flags. Use print() function to customize printing process.

Parameters

out
Output stream to print to.
node
Node to be printed.

Returns

Output stream.

constant parse_no_data_nodes

Synopsis

const int parse_no_data_nodes = 0x1;

Description

Parse flag instructing the parser to not create data nodes. Text of first data node will still be placed in value of parent element, unless rapidxml::parse_no_element_values flag is also specified. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_no_element_values

Synopsis

const int parse_no_element_values = 0x2;

Description

Parse flag instructing the parser to not use text of first data node as a value of parent element. Can be combined with other flags by use of | operator. Note that child data nodes of element node take precendence over its value when printing. That is, if element has one or more child data nodes and a value, the value will be ignored. Use rapidxml::parse_no_data_nodes flag to prevent creation of data nodes if you want to manipulate data using values of elements.

See xml_document::parse() function.

constant parse_no_string_terminators

Synopsis

const int parse_no_string_terminators = 0x4;

Description

Parse flag instructing the parser to not place zero terminators after strings in the source text. By default zero terminators are placed, modifying source text. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_no_entity_translation

Synopsis

const int parse_no_entity_translation = 0x8;

Description

Parse flag instructing the parser to not translate entities in the source text. By default entities are translated, modifying source text. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_no_utf8

Synopsis

const int parse_no_utf8 = 0x10;

Description

Parse flag instructing the parser to disable UTF-8 handling and assume plain 8 bit characters. By default, UTF-8 handling is enabled. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_declaration_node

Synopsis

const int parse_declaration_node = 0x20;

Description

Parse flag instructing the parser to create XML declaration node. By default, declaration node is not created. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_comment_nodes

Synopsis

const int parse_comment_nodes = 0x40;

Description

Parse flag instructing the parser to create comments nodes. By default, comment nodes are not created. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_doctype_node

Synopsis

const int parse_doctype_node = 0x80;

Description

Parse flag instructing the parser to create DOCTYPE node. By default, doctype node is not created. Although W3C specification allows at most one DOCTYPE node, RapidXml will silently accept documents with more than one. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_pi_nodes

Synopsis

const int parse_pi_nodes = 0x100;

Description

Parse flag instructing the parser to create PI nodes. By default, PI nodes are not created. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_validate_closing_tags

Synopsis

const int parse_validate_closing_tags = 0x200;

Description

Parse flag instructing the parser to validate closing tag names. If not set, name inside closing tag is irrelevant to the parser. By default, closing tags are not validated. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_trim_whitespace

Synopsis

const int parse_trim_whitespace = 0x400;

Description

Parse flag instructing the parser to trim all leading and trailing whitespace of data nodes. By default, whitespace is not trimmed. This flag does not cause the parser to modify source text. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_normalize_whitespace

Synopsis

const int parse_normalize_whitespace = 0x800;

Description

Parse flag instructing the parser to condense all whitespace runs of data nodes to a single space character. Trimming of leading and trailing whitespace of data is controlled by rapidxml::parse_trim_whitespace flag. By default, whitespace is not normalized. If this flag is specified, source text will be modified. Can be combined with other flags by use of | operator.

See xml_document::parse() function.

constant parse_default

Synopsis

const int parse_default = 0;

Description

Parse flags which represent default behaviour of the parser. This is always equal to 0, so that all other flags can be simply ored together. Normally there is no need to inconveniently disable flags by anding with their negated (~) values. This also means that meaning of each flag is a negation of the default setting. For example, if flag name is rapidxml::parse_no_utf8, it means that utf-8 is enabled by default, and using the flag will disable it.

See xml_document::parse() function.

constant parse_non_destructive

Synopsis

const int parse_non_destructive = parse_no_string_terminators | parse_no_entity_translation;

Description

A combination of parse flags that forbids any modifications of the source text. This also results in faster parsing. However, note that the following will occur:
  • names and values of nodes will not be zero terminated, you have to use xml_base::name_size() and xml_base::value_size() functions to determine where name and value ends
  • entities will not be translated
  • whitespace will not be normalized
See xml_document::parse() function.

constant parse_fastest

Synopsis

const int parse_fastest = parse_non_destructive | parse_no_data_nodes;

Description

A combination of parse flags resulting in fastest possible parsing, without sacrificing important data.

See xml_document::parse() function.

constant parse_full

Synopsis

const int parse_full = parse_declaration_node | parse_comment_nodes | parse_doctype_node | parse_pi_nodes | parse_validate_closing_tags;

Description

A combination of parse flags resulting in largest amount of data being extracted. This usually results in slowest parsing.

See xml_document::parse() function.

constant print_no_indenting

Synopsis

const int print_no_indenting = 0x1;

Description

Printer flag instructing the printer to suppress indenting of XML. See print() function.

================================================ FILE: test/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.24) project(rapidxml) # Include the Conan toolchain include(${CMAKE_CURRENT_SOURCE_DIR}/conan_toolchain.cmake) # GoogleTest requires at least C++14 set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) option(RAPIDXML_PERF_TESTS "Enable (very slow) performance tests" OFF) option(RAPIDXML_SENTRY "Use Sentry (for tests only)" ON) find_package(GTest) find_package(flxml CONFIG REQUIRED) if (RAPIDXML_SENTRY) set(SENTRY_BACKEND inproc) find_package(sentry) endif(RAPIDXML_SENTRY) enable_testing() add_executable(rapidxml-test src/parse-simple.cpp src/manipulations.cpp src/round-trips.cpp src/low-level-parse.cpp src/perf.cpp src/iterators.cpp src/xpath.cpp src/main.cc ) target_link_libraries(rapidxml-test PRIVATE GTest::gtest flxml::flxml ) if(RAPIDXML_SENTRY) target_link_libraries(rapidxml-test PRIVATE sentry-native::sentry-native) target_compile_definitions(rapidxml-test PRIVATE DWD_GTEST_SENTRY=1) endif() target_include_directories(rapidxml-test PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ) if (RAPIDXML_PERF_TESTS) message("Running performance tests") file(DOWNLOAD https://www.w3.org/TR/xml/REC-xml-20081126.xml ${CMAKE_CURRENT_BINARY_DIR}/REC-xml-20081126.xml) target_compile_definitions(rapidxml-test PRIVATE RAPIDXML_TESTING=1 RAPIDXML_PERF_TESTS=1) else() message("Will skip performance tests") target_compile_definitions(rapidxml-test PRIVATE RAPIDXML_TESTING=1) endif() include(GoogleTest) gtest_discover_tests(rapidxml-test) ================================================ FILE: test/conanfile.py ================================================ from conan import ConanFile from conan.tools.cmake import CMakeToolchain, CMake, CMakeDeps class FLXML(ConanFile): name = "flxml-test" settings = "os", "compiler", "build_type", "arch" test_type = "explicit" def configure(self): self.options["sentry-native"].backend = "inproc" def requirements(self): self.requires(f'flxml/{self.version}') self.requires("sentry-native/0.7.11") self.requires("gtest/1.12.1") def generate(self): deps = CMakeDeps(self) deps.generate() tc = CMakeToolchain(self) tc.user_presets_path = False tc.generate() def build(self): cmake = CMake(self) cmake.configure() cmake.build() def test(self): if not self.conf.get("tools.build:skip_test"): self.run("./rapidxml-test", env="conanrun") ================================================ FILE: test/sonar-project.properties ================================================ sonar.projectKey=dwd-github_rapidxml sonar.organization=dwd-github # This is the name and version displayed in the SonarCloud UI. #sonar.projectName=RapidXML #sonar.projectVersion=1.0 # Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows. #sonar.sources=. # Encoding of the source code. Default is default system encoding #sonar.sourceEncoding=UTF-8 sonar.exclusions=**/deps/**, **/_deps/**, **/sentry-native/**, **/*.html, **/googletest/** sonar.sources=. sonar.coverage.exclusions=**/deps/**,**/_deps/**,**/sentry-native/**,**/*.html,**/googletest/**,**/test/** ================================================ FILE: test/src/iterators.cpp ================================================ // // Created by dave on 10/07/2024. // #include #include #include #include #include TEST(Iterators, Nodes) { std::string xml = ""; flxml::xml_document<> doc; doc.parse(xml); int i = 0; for (auto & child : doc.first_node()->children()) { ++i; switch(i) { case 1: EXPECT_EQ(child.name(), "one"); break; case 2: EXPECT_EQ(child.name(), "two"); break; case 3: EXPECT_EQ(child.name(), "three"); break; } } EXPECT_EQ(i, 3); } TEST(Iterators, Attributes) { std::string xml = R"()"; flxml::xml_document<> doc; doc.parse(xml); int i = 0; for (auto & child : doc.first_node()->attributes()) { ++i; switch(i) { case 1: EXPECT_EQ(child.name(), "one"); break; case 2: EXPECT_EQ(child.name(), "two"); break; case 3: EXPECT_EQ(child.name(), "three"); break; } } EXPECT_EQ(i, 3); } TEST(Predicates, Nodes) { std::string xml = ""; flxml::xml_document<> doc; doc.parse(xml); auto r = doc.first_node()->children(); for (auto const & child : r | std::ranges::views::filter([](auto const & n) { return n.name() == "two"; })) { EXPECT_EQ(child.name(), "two"); } auto c = std::ranges::count_if(r, [](auto const & n) { return n.name() == "two"; }); EXPECT_EQ(c, 1); auto match = std::ranges::find_if(r, [](auto const & n) { return n.name() == "two"; }); EXPECT_EQ(match->name(), "two"); } TEST(Predicates, AllNodes) { std::string xml = ""; flxml::xml_document<> doc; doc.parse(xml); auto it = flxml::descendant_iterator<>(doc.first_node()); EXPECT_EQ(it->name(), "one"); ++it; EXPECT_EQ(it->name(), "two"); ++it; EXPECT_EQ(it->name(), "three"); ++it; EXPECT_EQ(it->name(), "four"); ++it; EXPECT_EQ(it->name(), "five"); ++it; EXPECT_EQ(it->name(), "six"); ++it; EXPECT_FALSE(it.valid()); } TEST(Predicates, AllNodesRev) { std::string xml = ""; flxml::xml_document<> doc; doc.parse(xml); auto it = flxml::descendant_iterator<>(doc.first_node()); EXPECT_EQ(it->name(), "one"); ++it; EXPECT_EQ(it->name(), "two"); ++it; EXPECT_EQ(it->name(), "three"); ++it; EXPECT_EQ(it->name(), "four"); ++it; EXPECT_EQ(it->name(), "five"); ++it; EXPECT_EQ(it->name(), "six"); --it; EXPECT_EQ(it->name(), "five"); --it; EXPECT_EQ(it->name(), "four"); --it; EXPECT_EQ(it->name(), "three"); --it; EXPECT_EQ(it->name(), "two"); --it; EXPECT_EQ(it->name(), "one"); } TEST(Predicates, Attributes) { std::string xml = R"()"; flxml::xml_document<> doc; doc.parse(xml); auto r = doc.first_node()->attributes(); for (auto const & child : r | std::ranges::views::filter([](auto const & n) { return n.name() == "two"; })) { EXPECT_EQ(child.name(), "two"); } auto c = std::ranges::count_if(r, [](auto const & n) { return n.name() == "two"; }); EXPECT_EQ(c, 1); auto match = std::ranges::find_if(r, [](auto const & n) { return n.name() == "two"; }); EXPECT_EQ(match->name(), "two"); auto match2 = std::ranges::find_if(doc.first_node()->attributes(), [](auto const & n) { return n.name() == "two"; }); EXPECT_EQ(match2->name(), "two"); } ================================================ FILE: test/src/low-level-parse.cpp ================================================ // // Created by dave on 05/07/2024. // #include #include TEST(Constants, Empty) { flxml::xml_document<> doc; auto empty = doc.nullstr(); EXPECT_EQ(empty, ""); EXPECT_EQ(empty.size(), 0); } TEST(Predicates, Skip) { std::string test_data{""}; auto start = test_data.c_str(); auto end = ++start; flxml::xml_document<>::skip::element_name_pred,0>(end); EXPECT_EQ(*end, '/'); std::string_view sv({start, end}); EXPECT_EQ(sv, "simple"); } TEST(PredicateBuffer, Skip) { std::string test_data{""}; auto start = flxml::buffer_ptr(test_data); auto end = ++start; flxml::xml_document<>::skip::element_name_pred,0>(end); EXPECT_EQ(*end, '/'); std::string_view sv({start, end}); EXPECT_EQ(sv, "simple"); } TEST(Predicates, SkipAndExpand) { std::string test_data{"&hello;<"}; char * start = const_cast(test_data.c_str()); auto end = flxml::xml_document<>::skip_and_expand_character_refs< flxml::xml_document<>::text_pred, flxml::xml_document<>::text_pure_with_ws_pred, flxml::parse_no_entity_translation>(start); EXPECT_EQ(*end, '<'); } TEST(Predicates, SkipAndExpandShort) { std::string test_data{"&hello;"}; char * start = const_cast(test_data.c_str()); auto end = flxml::xml_document<>::skip_and_expand_character_refs< flxml::xml_document<>::text_pred, flxml::xml_document<>::text_pure_with_ws_pred, flxml::parse_no_entity_translation>(start); EXPECT_EQ(*end, '\0'); } TEST(Predicates, SkipAndExpandShorter) { std::string test_data{"&hell"}; char * start = const_cast(test_data.c_str()); auto end = flxml::xml_document<>::skip_and_expand_character_refs< flxml::xml_document<>::text_pred, flxml::xml_document<>::text_pure_with_ws_pred, flxml::parse_no_entity_translation>(start); EXPECT_EQ(*end, '\0'); } TEST(ParseFns, ParseBom) { std::string test_data{"\xEF\xBB\xBF"}; char *start = const_cast(test_data.c_str()); flxml::xml_document<> doc; doc.parse_bom<0>(start); EXPECT_EQ(*start, '<'); } TEST(ParseFns, ParseBomShort) { std::string test_data{"\xEF\xBB\xBF"}; char *start = const_cast(test_data.c_str()); flxml::xml_document<> doc; doc.parse_bom<0>(start); EXPECT_EQ(*start, '\0'); } TEST(ParseFns, ParseBomShorter) { std::string test_data{"\xEF\xBB"}; char *start = const_cast(test_data.c_str()); flxml::xml_document<> doc; doc.parse_bom<0>(start); EXPECT_EQ(*start, '\xEF'); } ================================================ FILE: test/src/main.cc ================================================ // // Created by dave on 30/07/2024. // #include "gtest/gtest.h" #ifdef DWD_GTEST_SENTRY #include class EventListener : public ::testing::TestEventListener { sentry_transaction_context_t *tx_ctx = nullptr; sentry_transaction_t *tx = nullptr; sentry_span_t *main_span = nullptr; sentry_span_t *suite_span = nullptr; sentry_span_t *test_span = nullptr; std::string const & m_progname; public: EventListener(std::string const & progname) : m_progname(progname) {} ~EventListener() override = default; // Override this to define how to set up the environment. void OnTestProgramStart(const ::testing::UnitTest & u) override { sentry_options_t *options = sentry_options_new(); sentry_options_set_traces_sample_rate(options, 1.0); sentry_init(options); } void OnTestProgramEnd(const ::testing::UnitTest &) override { sentry_shutdown(); } void OnTestStart(::testing::TestInfo const & test_info) override { const char * testName = test_info.name(); std::string tname = test_info.test_suite_name(); tname += "."; tname += testName; test_span = sentry_span_start_child( suite_span, "test", tname.c_str() ); } // Override this to define how to tear down the environment. void OnTestEnd(const ::testing::TestInfo & ti) override { if (ti.result()->Failed()) { sentry_span_set_status(test_span, sentry_span_status_t::SENTRY_SPAN_STATUS_INTERNAL_ERROR); } sentry_span_finish(test_span); // Mark the span as finished } void OnTestIterationStart(const testing::UnitTest &unit_test, int iteration) override { tx_ctx = sentry_transaction_context_new( m_progname.c_str(), "googletest" ); tx = sentry_transaction_start(tx_ctx, sentry_value_new_null()); main_span = sentry_transaction_start_child( tx, "googletest", m_progname.c_str() ); } void OnEnvironmentsSetUpStart(const testing::UnitTest &unit_test) override { } void OnEnvironmentsSetUpEnd(const testing::UnitTest &unit_test) override { } void OnTestSuiteStart(const testing::TestSuite &suite) override { suite_span = sentry_span_start_child( main_span, "test.suite", suite.name() ); TestEventListener::OnTestSuiteStart(suite); } void OnTestCaseStart(const testing::TestCase &aCase) override { TestEventListener::OnTestCaseStart(aCase); } void OnTestDisabled(const testing::TestInfo &info) override { TestEventListener::OnTestDisabled(info); } void OnTestPartResult(const testing::TestPartResult &test_part_result) override { sentry_set_span(test_span); auto val = sentry_value_new_breadcrumb("test", test_part_result.message()); sentry_add_breadcrumb(val); if (test_part_result.failed()) { auto ev = sentry_value_new_event(); auto exc = sentry_value_new_exception("GoogleTest", test_part_result.message()); sentry_value_set_stacktrace(exc, nullptr, 0); sentry_event_add_exception(ev, exc); sentry_capture_event(ev); } } void OnTestSuiteEnd(const testing::TestSuite &suite) override { TestEventListener::OnTestSuiteEnd(suite); if (suite.failed_test_count() > 0) { sentry_span_set_status(suite_span, sentry_span_status_t::SENTRY_SPAN_STATUS_INTERNAL_ERROR); } sentry_span_finish(suite_span); // Mark the span as finished } void OnTestCaseEnd(const testing::TestCase &aCase) override { TestEventListener::OnTestCaseEnd(aCase); } void OnEnvironmentsTearDownStart(const testing::UnitTest &unit_test) override { } void OnEnvironmentsTearDownEnd(const testing::UnitTest &unit_test) override { } void OnTestIterationEnd(const testing::UnitTest &unit_test, int iteration) override { if (unit_test.failed_test_count() > 0) { sentry_span_set_status(main_span, sentry_span_status_t::SENTRY_SPAN_STATUS_INTERNAL_ERROR); sentry_transaction_set_status(tx, sentry_span_status_t::SENTRY_SPAN_STATUS_INTERNAL_ERROR); } sentry_span_finish(main_span); // Mark the span as finished sentry_transaction_finish(tx); } }; #endif int main(int argc, char ** argv) { std::string progname(argv[0]); auto slash = progname.find_last_of("/\\"); if (slash != std::string::npos) { progname = progname.substr(slash + 1); } ::testing::InitGoogleTest(&argc, argv); auto & listeners = ::testing::UnitTest::GetInstance()->listeners(); #ifdef DWD_GTEST_SENTRY listeners.Append(new EventListener(progname)); #endif auto ret = RUN_ALL_TESTS(); return ret; } ================================================ FILE: test/src/manipulations.cpp ================================================ // // Created by dwd on 01/07/24. // #include #include #include namespace { auto print(flxml::xml_document<> & doc) { std::string output; flxml::print(std::back_inserter(output), *doc.first_node()); return output; } } TEST(Create, Node) { flxml::xml_document<> doc; auto node = doc.allocate_node(flxml::node_element, "fish", "cakes"); doc.append_node(node); EXPECT_EQ( print(doc), "cakes\n" ); } TEST(Create, NodeEmpty) { flxml::xml_document<> doc; auto node = doc.allocate_node(flxml::node_element, "fish"); doc.append_node(node); EXPECT_EQ( print(doc), "\n" ); } TEST(Create, Node2) { flxml::xml_document<> doc; auto node = doc.allocate_node(flxml::node_element, "fish", "cakes"); doc.append_node(node); EXPECT_EQ( print(doc), "cakes\n" ); } static std::string s = "tuna"; std::string const & fn() { return s; } TEST(Create, NodeAttr) { flxml::xml_document<> doc; auto node = doc.allocate_node(flxml::node_element, "fish", "cakes"); auto haddock = doc.allocate_attribute("id", "haddock"); node->append_attribute(haddock); doc.append_node(node); EXPECT_EQ( print(doc), "cakes\n" ); const std::string & s2 = fn(); const flxml::xml_attribute<>::view_type & sv{s2}; auto tuna = doc.allocate_attribute("not-id", fn()); // These check that the same buffer is being used throughout, instead of creating temporaries. EXPECT_EQ(s.data(), s2.data()); EXPECT_EQ(s.data(), sv.data()); EXPECT_EQ(s.data(), tuna->value().data()); node->append_attribute(tuna); EXPECT_EQ(haddock->next_attribute(), tuna); EXPECT_EQ(tuna->parent(), node); EXPECT_EQ( print(doc), "cakes\n" ); node->remove_attribute(tuna); EXPECT_EQ(haddock->next_attribute(), nullptr); EXPECT_EQ(tuna->parent(), nullptr); EXPECT_EQ( print(doc), "cakes\n" ); node->prepend_attribute(tuna); EXPECT_EQ( print(doc), "cakes\n" ); node->value("pie"); EXPECT_EQ( print(doc), "pie\n" ); node->remove_all_attributes(); EXPECT_EQ( print(doc), "pie\n" ); auto child = node->append_element({"urn:xmpp:fish:0", "shark"}, fn()); EXPECT_EQ(s.data(), child->value().data()); EXPECT_EQ( print(doc), "\n\ttuna\n\n" ); child->append_element({"urn:xmpp:fish:0", "species"}, "tiger"); EXPECT_EQ( print(doc), "\n\t\n\t\ttiger\n\t\n\n" ); } ================================================ FILE: test/src/parse-simple.cpp ================================================ // // Created by dwd on 1/13/24. // #include #include TEST(Parser, SingleElement) { char doc_text[] = ""; flxml::xml_document<> doc; doc.parse<0>(doc_text); auto node = doc.first_node(); EXPECT_NE(nullptr, node); EXPECT_FALSE(node->name().empty()); EXPECT_EQ("single-element", node->name()); doc.validate(); } TEST(Parser, DefaultElementNS) { char doc_text[] = ""; flxml::xml_document<> doc; doc.parse(doc_text); auto node = doc.first_node(); EXPECT_NE(nullptr, node); EXPECT_FALSE(node->name().empty()); EXPECT_EQ("element", node->name()); EXPECT_EQ(node->xmlns(), "this"); auto child = node->first_node(); EXPECT_EQ(child->name(), "child"); EXPECT_EQ(child->xmlns(), "this"); doc.validate(); auto no_node = child->next_sibling(); EXPECT_THROW(no_node->xmlns(), flxml::no_such_node); } TEST(Parser, UnboundPrefix) { flxml::xml_document<> doc; char doc_text[] = ""; doc.parse<0>(doc_text); auto node = doc.first_node(); EXPECT_EQ("single-element", node->name()); EXPECT_THROW( doc.validate(), flxml::element_xmlns_unbound ); } TEST(Parser, DuplicateAttribute) { flxml::xml_document<> doc; char doc_text[] = ""; doc.parse<0>(doc_text); auto node = doc.first_node(); EXPECT_EQ("single-element", node->name()); EXPECT_THROW( doc.validate(), flxml::duplicate_attribute ); } TEST(Parser, UnboundAttrPrefix) { flxml::xml_document<> doc; char doc_text[] = ""; doc.parse<0>(doc_text); auto node = doc.first_node(); EXPECT_EQ("single-element", node->name()); auto attr = node->first_attribute(); EXPECT_THROW( doc.validate(), flxml::attr_xmlns_unbound ); EXPECT_THROW( attr->xmlns(), flxml::attr_xmlns_unbound ); } TEST(Parser, DuplicateAttrPrefix) { flxml::xml_document<> doc; char doc_text[] = ""; doc.parse<0>(doc_text); auto node = doc.first_node(); assert(std::string("single-element") == node->name()); EXPECT_THROW( doc.validate(), flxml::duplicate_attribute ); } TEST(Parser, Xmlns) { flxml::xml_document<> doc; char doc_text[] = ""; doc.parse<0>(doc_text); auto node = doc.first_node(); EXPECT_EQ("single", node->name()); EXPECT_EQ("pfx", node->prefix()); EXPECT_EQ("urn:xmpp:example", node->xmlns()); doc.validate(); } TEST(Parser, ChildXmlns) { flxml::xml_document<> doc; char doc_text[] = ""; doc.parse<0>(doc_text); auto node = doc.first_node(); EXPECT_EQ("single", node->name()); auto child = node->first_node({}, "urn:potato"); ASSERT_NE(nullptr, child); EXPECT_EQ("child", child->name()); EXPECT_EQ("urn:potato", child->xmlns()); child = node->first_node(); EXPECT_EQ("firstchild", child->name()); EXPECT_EQ("urn:xmpp:example", child->xmlns()); child = child->next_sibling(); EXPECT_EQ("child", child->name()); EXPECT_EQ("urn:potato", child->xmlns()); child = child->next_sibling(); EXPECT_EQ("child", child->name()); EXPECT_EQ("urn:xmpp:example", child->xmlns()); child = node->first_node("child"); EXPECT_EQ("child", child->name()); EXPECT_EQ("urn:xmpp:example", child->xmlns()); child = node->first_node()->next_sibling({}, "urn:xmpp:example"); EXPECT_EQ("child", child->name()); EXPECT_EQ("urn:xmpp:example", child->xmlns()); child = node->first_node()->next_sibling("child"); // This will default to the same namespace as the first child ndoe. EXPECT_EQ("child", child->name()); EXPECT_EQ("urn:xmpp:example", child->xmlns()); auto attr = node->first_attribute(); EXPECT_EQ(attr->xmlns(), "http://www.w3.org/2000/xmlns/"); EXPECT_EQ(attr->local_name(), "pfx"); EXPECT_EQ(attr->name(), "xmlns:pfx"); EXPECT_EQ(attr->value(), "urn:xmpp:example"); attr = attr->next_attribute(); EXPECT_EQ(attr->xmlns(), ""); EXPECT_EQ(attr->local_name(), "foo"); EXPECT_EQ(attr->name(), "foo"); EXPECT_EQ(attr->value(), "bar"); doc.validate(); } TEST(Parser, HandleEOF){ flxml::xml_document<> doc; char doc_text[] = ""; EXPECT_THROW( doc.parse<0>(doc_text), flxml::eof_error ); } TEST(ParseOptions, Fastest) { flxml::xml_document<> doc; char doc_text[] = ""; doc.parse(doc_text); auto node = doc.first_node(); EXPECT_EQ("single", node->name()); EXPECT_EQ("urn:xmpp:example", node->xmlns()); auto child = node->first_node({}, "urn:potato"); ASSERT_NE(nullptr, child); EXPECT_EQ("child", child->name()); EXPECT_EQ("urn:potato", child->xmlns()); child = node->first_node(); EXPECT_EQ("firstchild", child->name()); EXPECT_EQ("urn:xmpp:example", child->xmlns()); child = node->first_node("child"); EXPECT_EQ("child", child->name()); EXPECT_EQ("urn:xmpp:example", child->xmlns()); doc.validate(); } TEST(ParseOptions, OpenOnly) { flxml::xml_document<> doc; char doc_text[] = ""; doc.parse(doc_text); auto node = doc.first_node(); EXPECT_EQ("single", node->name()); EXPECT_EQ("pfx", node->prefix()); EXPECT_EQ("urn:xmpp:example", node->xmlns()); doc.validate(); } TEST(ParseOptions, ParseOne) { flxml::xml_document<> doc; char doc_text[] = "Hello!"; const char * text = doc.parse(doc_text); { auto node = doc.first_node(); EXPECT_EQ("single", node->name()); EXPECT_EQ("pfx", node->prefix()); EXPECT_EQ("urn:xmpp:example", node->xmlns()); EXPECT_STREQ( "Hello!", text); } doc.validate(); unsigned counter = 0; while (*text) { flxml::xml_document<> subdoc; text = subdoc.parse(text, &doc); auto node = subdoc.first_node(); ASSERT_NE(nullptr, node); switch(++counter) { case 1: EXPECT_EQ("features", node->name()); EXPECT_EQ("urn:xmpp:example", node->xmlns()); break; case 2: EXPECT_EQ("message", node->name()); EXPECT_EQ("jabber:client", node->xmlns()); break; default: FAIL(); } subdoc.validate(); } } TEST(ParseOptions, OpenOnlyFastest) { flxml::xml_document<> doc; char doc_text[] = "Hello!"; const char * text = doc.parse(doc_text); { auto node = doc.first_node(); EXPECT_EQ("single", node->name()); EXPECT_EQ("pfx", node->prefix()); EXPECT_EQ("urn:xmpp:example", node->xmlns()); EXPECT_STREQ( "Hello!", text); } doc.validate(); unsigned counter = 0; while (*text) { flxml::xml_document<> subdoc; text = subdoc.parse(text, &doc); auto node = subdoc.first_node(); ASSERT_NE(nullptr, node); switch(++counter) { case 1: EXPECT_EQ("features", node->name()); EXPECT_EQ("urn:xmpp:example", node->xmlns()); break; case 2: EXPECT_EQ("message", node->name()); EXPECT_EQ("jabber:client", node->xmlns()); break; default: FAIL(); } subdoc.validate(); } } TEST(Parser_Emoji, Single) { std::string foo{"'"}; flxml::xml_document<> doc; doc.parse(foo); EXPECT_EQ("'", doc.first_node()->value()); } TEST(Parser_Emoji, SingleUni) { std::string foo{"Ӓ"}; flxml::xml_document<> doc; doc.parse(foo); EXPECT_EQ("\xD3\x92", doc.first_node()->value()); } TEST(Parser_Emoji, SingleEmoji) { std::string foo{"😀"}; flxml::xml_document<> doc; doc.parse(foo); EXPECT_EQ("\xF0\x9F\x98\x80", doc.first_node()->value()); EXPECT_EQ(4, doc.first_node()->value().size()); } TEST(Parser_Emoji, SingleEmojiReuse) { std::string bar("Sir I bear a rhyme excelling in mystic verse and magic spelling 😀"); flxml::xml_document<> doc; flxml::xml_document<> parent_doc; parent_doc.parse(""); doc.parse(bar, &parent_doc); EXPECT_EQ("Sir I bear a rhyme excelling in mystic verse and magic spelling \xF0\x9F\x98\x80", doc.first_node()->value()); auto doc_a = doc.first_node()->document(); doc.first_node()->value(doc_a->allocate_string("Sausages are the loneliest fruit, and are but one of the strange things I have witnessed in my long and interesting life.")); EXPECT_EQ("Sausages are the loneliest fruit, and are but one of the strange things I have witnessed in my long and interesting life.", doc.first_node()->value()); bar = "😀"; doc.parse(bar, &parent_doc); EXPECT_EQ("\xF0\x9F\x98\x80", doc.first_node()->value()); EXPECT_EQ(4, doc.first_node()->value().size()); } ================================================ FILE: test/src/perf.cpp ================================================ // // Created by dave on 07/07/2024. // #include #include #include #include #include "flxml/print.h" #include "flxml/iterators.h" const auto xml_sample_file = "REC-xml-20081126.xml"; #ifdef RAPIDXML_PERF_TESTS #define PERF_TEST() (void)0 #else #define PERF_TEST() GTEST_SKIP() << "Skipping performance test" #endif TEST(Perf, Parse) { using std::chrono::high_resolution_clock; using std::chrono::duration_cast; using std::chrono::microseconds; PERF_TEST(); flxml::file source(xml_sample_file); std::vector timings; for (auto i = 0; i != 1000; ++i) { flxml::xml_document<> doc; auto t1 = high_resolution_clock::now(); doc.parse(source.data()); auto t2 = high_resolution_clock::now(); auto ms_int = duration_cast(t2 - t1); timings.push_back(ms_int.count()); } auto total = 0ULL; for (auto t : timings) { total += t / 1000; } std::cout << "Execution time: " << total << " us\n"; } TEST(Perf, Parse2) { using std::chrono::high_resolution_clock; using std::chrono::duration_cast; using std::chrono::microseconds; PERF_TEST(); flxml::file source(xml_sample_file); std::vector timings; for (auto i = 0; i != 1000; ++i) { flxml::xml_document<> doc; std::string_view sv{source.data(), source.size() - 1}; // Drop the NUL. auto t1 = high_resolution_clock::now(); doc.parse(sv); auto t2 = high_resolution_clock::now(); auto ms_int = duration_cast(t2 - t1); timings.push_back(ms_int.count()); } auto total = 0ULL; for (auto t : timings) { total += t / 1000; } std::cout << "Execution time: " << total << " us\n"; } TEST(Perf, PrintClean) { using std::chrono::high_resolution_clock; using std::chrono::duration_cast; using std::chrono::microseconds; PERF_TEST(); flxml::file source(xml_sample_file); std::vector timings; flxml::xml_document<> doc; doc.parse(source.data()); for (auto i = 0; i != 1000; ++i) { std::string output; auto t1 = high_resolution_clock::now(); flxml::print(std::back_inserter(output), doc); auto t2 = high_resolution_clock::now(); auto ms_int = duration_cast(t2 - t1); timings.push_back(ms_int.count()); } auto total = 0ULL; for (auto t : timings) { total += t / 1000; } std::cout << "Execution time: " << total << " us\n"; } TEST(Perf, PrintDirty) { using std::chrono::high_resolution_clock; using std::chrono::duration_cast; using std::chrono::microseconds; PERF_TEST(); flxml::file source(xml_sample_file); std::vector timings; flxml::xml_document<> doc_o; doc_o.parse(source.data()); flxml::xml_document<> doc; for (auto & child : flxml::children(doc_o)) { doc.append_node(doc.clone_node(&child, true)); } for (auto i = 0; i != 1000; ++i) { std::string output; auto t1 = high_resolution_clock::now(); flxml::print(std::back_inserter(output), doc); auto t2 = high_resolution_clock::now(); auto ms_int = duration_cast(t2 - t1); timings.push_back(ms_int.count()); } auto total = 0ULL; for (auto t : timings) { total += t / 1000; } std::cout << "Execution time: " << total << " us\n"; } ================================================ FILE: test/src/round-trips.cpp ================================================ // // Created by dave on 04/07/2024. // #include #include #include namespace { auto print(flxml::xml_document<> & doc) { std::string output; flxml::print(std::back_inserter(output), doc, flxml::print_no_indenting); return output; } } TEST(RoundTrip, Simple) { const char input[] = ""; std::vector buffer{input, input + sizeof(input)}; flxml::xml_document<> doc; doc.parse(buffer.data()); auto output = print(doc); // Have we parsed correctly? EXPECT_EQ(input, output); // Have we mutated the underlying buffer? EXPECT_EQ(input, std::string(buffer.data(), buffer.size() - 1)); } TEST(RoundTrip, SimpleMod) { const char input[] = ""; std::vector buffer{input, input + sizeof(input)}; flxml::xml_document<> doc; doc.parse(buffer.data()); auto output = print(doc); // Have we parsed correctly? EXPECT_EQ(input, output); // Have we mutated the underlying buffer? EXPECT_EQ(input, std::string(buffer.data(), buffer.size() - 1)); auto that = doc.first_node()->first_node(); doc.first_node()->remove_node(that); auto output2 = print(doc); EXPECT_EQ(output2, ""); std::string xmlns = "that"; std::string name = "this"; auto check = doc.first_node()->append_element(name, "the other"); doc.first_node()->append_element(name, "another'"); EXPECT_EQ(name, "this"); EXPECT_EQ(check->name(), name); EXPECT_EQ(check->name().data(), name.data()); doc.first_node()->append_element("odd", "the other"); doc.first_node()->append_element({xmlns, name}, "the other"); EXPECT_EQ(name, "this"); EXPECT_EQ(check->name(), name); EXPECT_EQ(check->name().data(), name.data()); doc.first_node()->append_element({"this", "that"}, "the other"); doc.first_node()->append_element(name, "last time"); EXPECT_EQ(name, "this"); EXPECT_EQ(check->name(), name); EXPECT_EQ(check->name().data(), name.data()); auto output3 = print(doc); EXPECT_EQ(name, "this"); EXPECT_EQ(check->name(), name); EXPECT_EQ(check->name().data(), name.data()); EXPECT_EQ(output3, "the otheranother'the otherthe otherthe otherlast time"); flxml::xml_document<> doc2; doc2.clone_node(doc.first_node(), true); auto output4 = print(doc); EXPECT_EQ(output3, output4); } TEST(RoundTrip, SimpleApos) { const char input[] = ""; std::vector buffer{input, input + sizeof(input)}; flxml::xml_document<> doc; doc.parse(buffer.data()); auto output = print(doc); // Have we parsed correctly? flxml::xml_document<> doc2; for (auto & child : flxml::children(doc)) { doc2.append_node(doc2.clone_node(&child, true)); } EXPECT_EQ(input, print(doc2)); EXPECT_EQ(input, output); // Have we mutated the underlying buffer? EXPECT_EQ(input, std::string(buffer.data(), buffer.size() - 1)); } TEST(RoundTrip, SimpleApos2) { const char input[] = ""; const char expected[] = ""; std::vector buffer{input, input + sizeof(input)}; flxml::xml_document<> doc; doc.parse(buffer.data()); auto output = print(doc); EXPECT_EQ(doc.first_node()->first_attribute()->value(), "'"); // Have we parsed correctly? flxml::xml_document<> doc2; for (auto & child : flxml::children(doc)) { doc2.append_node(doc2.clone_node(&child, true)); } EXPECT_EQ(expected, print(doc2)); EXPECT_EQ(expected, output); // Have we mutated the underlying buffer? EXPECT_EQ(input, std::string(buffer.data(), buffer.size() - 1)); } TEST(RoundTrip, SimpleLtBody) { const char input[] = "<"; const char expected[] = "<"; std::vector buffer{input, input + sizeof(input)}; flxml::xml_document<> doc; doc.parse(buffer.data()); auto output = print(doc); EXPECT_EQ(doc.first_node()->value(), "<"); EXPECT_EQ(doc.first_node()->first_attribute()->value(), "'"); // Have we parsed correctly? flxml::xml_document<> doc2; for (auto & child : flxml::children(doc)) { doc2.append_node(doc2.clone_node(&child, true)); } EXPECT_EQ(expected, print(doc2)); EXPECT_EQ(expected, output); // Have we mutated the underlying buffer? EXPECT_EQ(input, std::string(buffer.data(), buffer.size() - 1)); } TEST(RoundTrip, MutateBody) { const char input[] = "<"; const char expected[] = "<"; const char expected2[] = "new value"; std::vector buffer{input, input + sizeof(input)}; flxml::xml_document<> doc; doc.parse(buffer.data()); auto output = print(doc); EXPECT_EQ(expected, output); // Have we mutated the underlying buffer? EXPECT_EQ(input, std::string(buffer.data(), buffer.size() - 1)); doc.first_node()->value("new value"); EXPECT_EQ(doc.first_node()->value_raw(), ""); EXPECT_EQ(doc.first_node()->value(), "new value"); EXPECT_EQ(expected2, print(doc)); } TEST(RoundTrip, Everything) { const char input[] = ""; const char expected[] = ""; std::vector buffer{input, input + sizeof(input)}; flxml::xml_document<> doc; doc.parse(buffer.data()); auto output = print(doc); flxml::xml_document<> doc2; for (auto & child : flxml::children(doc)) { doc2.append_node(doc2.clone_node(&child, true)); } EXPECT_EQ(expected, print(doc2)); // Have we parsed correctly? EXPECT_EQ(expected, output); // Have we mutated the underlying buffer? EXPECT_EQ(input, std::string(buffer.data(), buffer.size() - 1)); } TEST(RoundTrip, EverythingStream) { const char input[] = ""; const char expected[] = "\n\n\n\t\n\n\n"; std::vector buffer{input, input + sizeof(input) - 1}; flxml::xml_document<> doc; doc.parse(buffer); std::stringstream ss1; ss1 << doc; auto output = ss1.str(); flxml::xml_document<> doc2; for (auto & child : doc.children()) { doc2.append_node(doc2.clone_node(&child, true)); } std::stringstream ss2; ss2 << doc2; EXPECT_EQ(expected, ss2.str()); // Have we parsed correctly? EXPECT_EQ(expected, output); // Have we mutated the underlying buffer? EXPECT_EQ(input, std::string(buffer.data(), buffer.size())); } ================================================ FILE: test/src/xpath.cpp ================================================ // // Created by dave on 29/07/2024. // #include #include TEST(XPath, parse) { std::string xpath_string = "//"; std::string_view sv{xpath_string}; auto xp = flxml::xpath<>::parse(sv); EXPECT_EQ(sv.length(), 0); EXPECT_NE(xp.get(), nullptr); EXPECT_EQ(xp->chain().size(), 1); } TEST(XPath, parse2) { std::string xpath_string = "//child"; std::string_view sv{xpath_string}; auto xp = flxml::xpath<>::parse(sv); EXPECT_EQ(sv.length(), 0); EXPECT_NE(xp.get(), nullptr); EXPECT_EQ(xp->chain().size(), 2); } TEST(XPath, parse1) { std::string xpath_string = "/child"; std::string_view sv{xpath_string}; auto xp = flxml::xpath<>::parse(sv); EXPECT_EQ(sv.length(), 0); EXPECT_NE(xp.get(), nullptr); EXPECT_EQ(xp->chain().size(), 2); } TEST(XPath, parse3) { std::string xpath_string = "//child[another/element]/something"; std::string_view sv{xpath_string}; auto xp = flxml::xpath<>::parse(sv); EXPECT_EQ(sv.length(), 0); EXPECT_NE(xp.get(), nullptr); EXPECT_EQ(xp->chain().size(), 4); ASSERT_EQ(xp->chain()[1]->contexts().size(), 1); EXPECT_EQ(xp->chain()[1]->contexts().begin()->get()->chain().size(), 4); } TEST(XPath, parse4) { std::string xpath_string = ""; std::string_view sv{xpath_string}; EXPECT_THROW( flxml::xpath<>::parse(sv), std::runtime_error ); } TEST(XPath, parse_attr) { std::string xpath_string = "//child[@foo='bar']/something"; std::string_view sv{xpath_string}; auto xp = flxml::xpath<>::parse(sv); EXPECT_EQ(sv.length(), 0); EXPECT_NE(xp.get(), nullptr); EXPECT_EQ(xp->chain().size(), 4); ASSERT_EQ(xp->chain()[1]->contexts().size(), 1); EXPECT_EQ(xp->chain()[1]->contexts().begin()->get()->chain().size(), 1); } TEST(XPath, parse_text) { std::string xpath_string = "//child[text()='bar']/something"; std::string_view sv{xpath_string}; auto xp = flxml::xpath<>::parse(sv); EXPECT_EQ(sv.length(), 0); EXPECT_NE(xp.get(), nullptr); EXPECT_EQ(xp->chain().size(), 4); ASSERT_EQ(xp->chain()[1]->contexts().size(), 1); EXPECT_EQ(xp->chain()[1]->contexts().begin()->get()->chain().size(), 1); } TEST(XPathFirst, simple_all) { flxml::xml_document<> doc; doc.parse(""); std::string xpath = "//"; std::string_view sv{xpath}; auto xp = flxml::xpath<>::parse(sv); auto r = xp->first(doc); ASSERT_TRUE(r); EXPECT_EQ(r->type(), flxml::node_type::node_document); } TEST(XPathFirst, simple_any) { flxml::xml_document<> doc; doc.parse(""); std::string xpath = "//child"; std::string_view sv{xpath}; auto xp = flxml::xpath<>::parse(sv); auto r = xp->first(doc); ASSERT_TRUE(r); EXPECT_EQ(r->name(), "child"); } TEST(XPathFirst, simple_sub) { flxml::xml_document<> doc; doc.parse(""); std::string xpath = "//[child]"; std::string_view sv{xpath}; auto xp = flxml::xpath<>::parse(sv); auto r = xp->first(doc); ASSERT_TRUE(r); EXPECT_EQ(r->name(), "simple"); } TEST(XPathFirst, simple_attr) { flxml::xml_document<> doc; doc.parse("foobar"); std::string xpath = "//child[@attr='val2']"; std::string_view sv{xpath}; auto xp = flxml::xpath<>::parse(sv); auto r = xp->first(doc); ASSERT_TRUE(r); EXPECT_EQ(r->name(), "child"); EXPECT_EQ(r->value(), "bar"); } TEST(XPathFirst, simple_text) { flxml::xml_document<> doc; doc.parse("foobar"); auto xp = flxml::xpath<>::parse("//child[text()='bar']"); auto r = xp->first(doc); ASSERT_TRUE(r); EXPECT_EQ(r->name(), "child"); EXPECT_EQ(r->value(), "bar"); } TEST(XPathNS, simple_text) { flxml::xml_document<> doc; doc.parse("foobar"); auto xp = flxml::xpath<>::parse("//child[text()='bar']"); auto r = xp->first(doc); ASSERT_TRUE(r); EXPECT_EQ(r->name(), "child"); EXPECT_EQ(r->value(), "bar"); } TEST(XPathNS, xmlns_text) { flxml::xml_document<> doc; doc.parse("foobar"); std::map xmlns = { {"x1", "p2"}, {"x2", "p1"} }; auto xp = flxml::xpath<>::parse(xmlns,"//x1:child[text()='bar']"); auto r = xp->first(doc); ASSERT_TRUE(r); EXPECT_EQ(r->name(), "child"); EXPECT_EQ(r->value(), "bar"); } TEST(XPathNS, xmlns_both) { flxml::xml_document<> doc; doc.parse("foobar"); std::map xmlns = { {"x1", "p2"}, {"x2", "p1"} }; auto xp = flxml::xpath<>::parse(xmlns,"//x1:child[text()='bar'][@attr='val2']"); auto r = xp->first(doc); ASSERT_TRUE(r); EXPECT_EQ(r->name(), "child"); EXPECT_EQ(r->value(), "bar"); } TEST(XPathNS, xmlns_text_miss) { flxml::xml_document<> doc; doc.parse("foobar"); std::map xmlns = { {"x1", "p2"}, {"x2", "p1"} }; auto xp = flxml::xpath<>::parse(xmlns,"//x2:child[text()='bar']"); auto r = xp->first(doc); ASSERT_FALSE(r); }