Skip to content

packages/engine/scram-node/src/xml.h

XML helper facilities to work with libxml2. More...

Namespaces

Name
scram
scram::xml
scram::xml::detail

Classes

Name
classscram::xml::Element <br>XML Element adaptor.
classscram::xml::Element::Range <br>The range for elements.
classscram::xml::Element::Range::iterator <br>Iterator over range elements.
classscram::xml::Document <br>XML DOM tree document.
classscram::xml::Validator <br>RelaxNG validator.

Detailed Description

XML helper facilities to work with libxml2.

Note:

  • All strings and characters are UTF-8 unless otherwise documented.
  • The facilities are designed specifically for SCRAM use cases. The XML assumed to be well formed and simple.
  • libxml2 older versions are not const correct in API.

Warning: Complex XML features are not handled or expected, for example, DTD, namespaces, entries.

Adaptors and helper functions provide read-only facilities.

Source code

cpp
/*
 * Copyright (C) 2014-2018 Olzhas Rakhimov
 * Copyright (C) 2023 OpenPRA ORG Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


#pragma once

#include <cmath>
#include <cstdint>
#include <cstdlib>

#include <iterator>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <type_traits>

#include <boost/exception/errinfo_at_line.hpp>
#include <boost/exception/errinfo_errno.hpp>
#include <boost/exception/errinfo_file_name.hpp>
#include <boost/exception/errinfo_file_open_mode.hpp>
#include <boost/iterator/iterator_facade.hpp>
#include <boost/range/adaptor/filtered.hpp>

#include <libxml/parser.h>
#include <libxml/relaxng.h>
#include <libxml/tree.h>

#include "error.h"

namespace scram::xml {

namespace detail {  // Internal XML helper functions.

template <typename T>
std::enable_if_t<std::is_arithmetic_v<T>, T> to(const std::string_view& value) {
  if constexpr (std::is_same_v<T, int>) {
    char* end_char = nullptr;
    std::int64_t ret = std::strtoll(value.data(), &end_char, 10);
    int len = end_char - value.data();
    if (len != value.size() || ret > std::numeric_limits<int>::max() ||
        ret < std::numeric_limits<int>::min()) {
      SCRAM_THROW(ValidityError("Failed to interpret value to int"))
          << errinfo_value(std::string(value));
    }
    return ret;

  } else if constexpr (std::is_same_v<T, double>) {  // NOLINT
    char* end_char = nullptr;
    double ret = std::strtod(value.data(), &end_char);
    int len = end_char - value.data();
    if (len != value.size() || ret == HUGE_VAL || ret == -HUGE_VAL) {
      SCRAM_THROW(ValidityError("Failed to interpret value to double"))
          << errinfo_value(std::string(value));
    }
    return ret;

  } else {
    static_assert(std::is_same_v<T, bool>, "Only default numeric types.");

    if (value == "true" || value == "1")
      return true;
    if (value == "false" || value == "0")
      return false;
    SCRAM_THROW(ValidityError("Failed to interpret value to bool"))
        << errinfo_value(std::string(value));
  }
}

inline const char* from_utf8(const xmlChar* xml_string) noexcept {
  assert(xml_string);
  return reinterpret_cast<const char*>(xml_string);
}

inline const xmlChar* to_utf8(const char* c_string) noexcept {
  assert(c_string);
  return reinterpret_cast<const xmlChar*>(c_string);
}

inline std::string_view trim(const std::string_view& text) noexcept {
  auto pos_first = text.find_first_not_of(' ');
  if (pos_first == std::string_view::npos)
    return {};

  auto pos_last = text.find_last_not_of(' ');
  auto len = pos_last - pos_first + 1;

  return std::string_view(text.data() + pos_first, len);
}

template <typename T>
T GetError(xmlErrorPtr xml_error = nullptr) {
  if (!xml_error)
    xml_error = xmlGetLastError();
  assert(xml_error && "No XML error is available.");
  T throw_error(xml_error->message);
  if (xml_error->file)
    throw_error << boost::errinfo_file_name(xml_error->file);
  if (xml_error->line)
    throw_error << boost::errinfo_at_line(xml_error->line);
  return throw_error;
}

}  // namespace detail

class Element {
 public:
  class Range {
   public:
    using value_type = Element;  

    class iterator
        : public boost::iterator_facade<iterator, Element,
                                        std::forward_iterator_tag, Element> {
      friend class boost::iterator_core_access;

     public:
      explicit iterator(const xmlElement* element = nullptr)
          : element_(element) {}

     private:
      void increment() {
        assert(element_ && "Incrementing end iterator!");
        element_ = Range::findElement(element_->next);
      }
      bool equal(const iterator& other) const {
        return element_ == other.element_;
      }
      Element dereference() const { return Element(element_); }

      const xmlElement* element_;  
    };

    using const_iterator = iterator;  

    explicit Range(const xmlNode* head) : begin_(findElement(head)) {}

    iterator begin() const { return begin_; }
    iterator end() const { return iterator(); }
    iterator cbegin() const { return begin_; }
    iterator cend() const { return iterator(); }

    bool empty() const { return begin() == end(); }

    std::size_t size() const { return std::distance(begin(), end()); }

   private:
    static const xmlElement* findElement(const xmlNode* node) noexcept {
      while (node && node->type != XML_ELEMENT_NODE)
        node = node->next;
      return reinterpret_cast<const xmlElement*>(node);
    }

    iterator begin_;  
  };

  explicit Element(const xmlElement* element) : element_(element) {
    assert(element_);
  }

  const char* filename() const { return detail::from_utf8(element_->doc->URL); }

  int line() const { return XML_GET_LINE(to_node()); }

  std::string_view name() const { return detail::from_utf8(element_->name); }

  bool has_attribute(const char* name) const {
    return xmlHasProp(to_node(), detail::to_utf8(name)) != nullptr;
  }

  std::string_view attribute(const char* name) const {
    const xmlAttr* property = xmlHasProp(to_node(), detail::to_utf8(name));
    if (!property)
      return {};
    const xmlNode* text_node = property->children;
    assert(text_node && text_node->type == XML_TEXT_NODE);
    assert(text_node->content);
    return detail::trim(detail::from_utf8(text_node->content));
  }

  template <typename T>
  std::enable_if_t<std::is_arithmetic_v<T>, std::optional<T>>
  attribute(const char* name) const {
    std::string_view value = attribute(name);
    if (value.empty())
      return {};
    try {
      return detail::to<T>(value);
    } catch (ValidityError& err) {
      err << errinfo_element(std::string(Element::name()))
          << errinfo_attribute(name) << boost::errinfo_at_line(line())
          << boost::errinfo_file_name(filename());
      throw;
    }
  }

  std::string_view text() const {
    const xmlNode* text_node = element_->children;
    while (text_node && text_node->type != XML_TEXT_NODE)
      text_node = text_node->next;
    assert(text_node && "Element does not have text.");
    assert(text_node->content && "Missing text in Element.");
    return detail::trim(detail::from_utf8(text_node->content));
  }

  template <typename T>
  std::enable_if_t<std::is_arithmetic_v<T>, T> text() const {
    try {
      return detail::to<T>(text());
    } catch (ValidityError& err) {
      err << errinfo_element(std::string(name()))
          << boost::errinfo_at_line(line())
          << boost::errinfo_file_name(filename());
      throw;
    }
  }

  std::optional<Element> child(std::string_view name = "") const {
    for (Element element : children()) {
      if (name.empty() || name == element.name())
        return element;
    }
    return {};
  }

  Range children() const { return Range(element_->children); }

  auto children(std::string_view name) const {
    return children() |
           boost::adaptors::filtered([name](const Element& element) {
             return element.name() == name;
           });
  }

 private:
  xmlNode* to_node() const {
    return reinterpret_cast<xmlNode*>(const_cast<xmlElement*>(element_));
  }

  const xmlElement* element_;  
};

const int kParserOptions = XML_PARSE_XINCLUDE | XML_PARSE_NOBASEFIX |
                           XML_PARSE_NONET | XML_PARSE_NOXINCNODE |
                           XML_PARSE_COMPACT | XML_PARSE_HUGE;

class Validator;  // Forward declaration for validation upon DOM constructions.

class Document {
 public:
  explicit Document(const std::string& file_path,
                    Validator* validator = nullptr);

  Element root() const {
    return Element(
        reinterpret_cast<const xmlElement*>(xmlDocGetRootElement(doc_.get())));
  }

  const xmlDoc* get() const { return doc_.get(); }
  xmlDoc* get() { return doc_.get(); }

 private:
  std::unique_ptr<xmlDoc, decltype(&xmlFreeDoc)> doc_;  
};

class Validator {
 public:
  explicit Validator(const std::string& rng_file);

  void validate(const Document& doc) {
    xmlResetLastError();
    int ret = xmlRelaxNGValidateDoc(valid_ctxt_.get(),
                                    const_cast<xmlDoc*>(doc.get()));
    if (ret != 0)
      SCRAM_THROW(detail::GetError<ValidityError>());
  }

 private:
  std::unique_ptr<xmlRelaxNG, decltype(&xmlRelaxNGFree)> schema_;
  std::unique_ptr<xmlRelaxNGValidCtxt, decltype(&xmlRelaxNGFreeValidCtxt)>
      valid_ctxt_;
};

}  // namespace scram::xml

Updated on 2025-11-11 at 16:51:09 +0000