Repository: gluc/data.tree Branch: master Commit: 849d95ef9fca Files: 114 Total size: 435.0 KB Directory structure: gitextract_u1u_requ/ ├── .Rbuildignore ├── .gitattributes ├── .github/ │ ├── .gitignore │ └── workflows/ │ ├── R-CMD-check.yaml │ └── test-coverage.yaml ├── .gitignore ├── .travis.yml ├── CRAN-SUBMISSION ├── DESCRIPTION ├── NAMESPACE ├── NEWS ├── R/ │ ├── data.tree-package.R │ ├── data_doc.R │ ├── node.R │ ├── node_actives.R │ ├── node_conversion.R │ ├── node_conversion_ape.R │ ├── node_conversion_dataframe.R │ ├── node_conversion_dendrogram.R │ ├── node_conversion_igraph.R │ ├── node_conversion_list.R │ ├── node_conversion_party.R │ ├── node_conversion_rpart.R │ ├── node_methods.R │ ├── node_methods_sideeffect.R │ ├── node_methods_traversal.R │ ├── node_plot.R │ ├── register-s3.R │ ├── release.R │ ├── util.R │ └── zzz.R ├── README.md ├── appveyor.yml ├── cran-comments.md ├── data/ │ ├── acme.rda │ └── mushroom.rda ├── data.tree.Rproj ├── data_gen/ │ ├── acme.R │ └── mushroom.R ├── getting-started-with-development.md ├── inst/ │ └── extdata/ │ ├── flare.json │ ├── jennylind.yaml │ ├── portfolio.csv │ └── useR15.csv ├── man/ │ ├── Aggregate.Rd │ ├── AreNamesUnique.Rd │ ├── CheckNameReservedWord.Rd │ ├── Climb.Rd │ ├── Clone.Rd │ ├── CreateRandomTree.Rd │ ├── CreateRegularTree.Rd │ ├── Cumulate.Rd │ ├── DefaultPlotHeight.Rd │ ├── Distance.Rd │ ├── Do.Rd │ ├── FindNode.Rd │ ├── FormatFixedDecimal.Rd │ ├── FormatPercent.Rd │ ├── Get.Rd │ ├── GetAttribute.Rd │ ├── GetPhyloNr.Rd │ ├── NODE_RESERVED_NAMES_CONST.Rd │ ├── Navigate.Rd │ ├── Node.Rd │ ├── Prune.Rd │ ├── Revert.Rd │ ├── Set.Rd │ ├── SetFormat.Rd │ ├── Sort.Rd │ ├── ToDiagrammeRGraph.Rd │ ├── ToNewick.Rd │ ├── Traverse.Rd │ ├── acme.Rd │ ├── as.Node.BinaryTree.Rd │ ├── as.Node.Rd │ ├── as.Node.data.frame.Rd │ ├── as.Node.dendrogram.Rd │ ├── as.Node.list.Rd │ ├── as.Node.party.Rd │ ├── as.Node.phylo.Rd │ ├── as.Node.rpart.Rd │ ├── as.data.frame.Node.Rd │ ├── as.dendrogram.Node.Rd │ ├── as.igraph.Node.Rd │ ├── as.list.Node.Rd │ ├── as.phylo.Node.Rd │ ├── averageBranchingFactor.Rd │ ├── data.tree.Rd │ ├── isLeaf.Rd │ ├── isNotLeaf.Rd │ ├── isNotRoot.Rd │ ├── isRoot.Rd │ ├── mushroom.Rd │ ├── print.Node.Rd │ └── s3_register.Rd ├── publish-cheat-sheet.md ├── tests/ │ ├── testthat/ │ │ ├── test-draw.R │ │ ├── test-treeConstruction.R │ │ ├── test-treeConversionApe.R │ │ ├── test-treeConversionDataFrame.R │ │ ├── test-treeConversionDendrogram.R │ │ ├── test-treeConversionList.R │ │ ├── test-treeConversionParty.R │ │ ├── test-treeConversionRpart.R │ │ ├── test-treeConversionigraph.R │ │ ├── test-treeDocu.R │ │ ├── test-treeMethods.R │ │ ├── test-treeMethodsSideEffect.R │ │ └── test-util.R │ └── testthat.R └── vignettes/ ├── applications.Rmd ├── applications.banner.html ├── data.tree.Rmd └── intro.banner.html ================================================ FILE CONTENTS ================================================ ================================================ FILE: .Rbuildignore ================================================ ^.*\.Rproj$ ^\.Rproj\.user$ ^\.travis\.yml$ data_gen cran-comments.md publish-cheat-sheet.md getting-started-with-development.md appveyor.yml travis-tool.sh.cmd Rprof.out ^appveyor\.yml$ revdep README.md ^CRAN-RELEASE$ ^\.github$ ^CRAN-SUBMISSION$ ================================================ FILE: .gitattributes ================================================ * text=auto data/* binary src/* text=lf R/* text=lf ================================================ FILE: .github/.gitignore ================================================ *.html ================================================ FILE: .github/workflows/R-CMD-check.yaml ================================================ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: branches: [main, master] pull_request: branches: [main, master] name: R-CMD-check jobs: R-CMD-check: runs-on: ${{ matrix.config.os }} name: ${{ matrix.config.os }} (${{ matrix.config.r }}) strategy: fail-fast: false matrix: config: - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - uses: actions/checkout@v3 - uses: r-lib/actions/setup-pandoc@v2 - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::rcmdcheck needs: check - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true ================================================ FILE: .github/workflows/test-coverage.yaml ================================================ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: branches: [main, master] pull_request: branches: [main, master] name: test-coverage jobs: test-coverage: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v3 - uses: r-lib/actions/setup-r@v2 with: use-public-rspm: true - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::covr needs: coverage - name: Test coverage run: | covr::codecov( quiet = FALSE, clean = FALSE, install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) shell: Rscript {0} - name: Show testthat output if: always() run: | ## -------------------------------------------------------------------- find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload test results if: failure() uses: actions/upload-artifact@v3 with: name: coverage-test-failures path: ${{ runner.temp }}/package ================================================ FILE: .gitignore ================================================ .Rproj.user .Rhistory .RData inst/doc private Rprof.out revdep ================================================ FILE: .travis.yml ================================================ language: r cache: packages r: - oldrel - release - devel sudo: false warnings_are_errors: true r_packages: - rmarkdown - covr r_check_args: --as-cran after_success: - Rscript -e 'library(covr);codecov()' notifications: email: on_success: change on_failure: change ================================================ FILE: CRAN-SUBMISSION ================================================ Version: 1.1.0 Date: 2023-11-12 11:25:34 UTC SHA: d9ceaf8a4343f32483a5856ff530f4a31a6b40fa ================================================ FILE: DESCRIPTION ================================================ Package: data.tree Type: Package Title: General Purpose Hierarchical Data Structure Version: 1.1.0 Date: 2023-11-11 Authors@R: c( person( "Russ", "Hyde", role = c("ctb"), comment = "improve dependencies" ), person( "Chris", "Hammill", role = c("ctb"), comment = "improve getting" ), person( "Facundo", "Munoz", role = c("ctb"), comment = "improve list conversion" ), person( "Markus", "Wamser" , role = c("ctb") , comment = "fixed some typos" ), person( "Pierre", "Formont" , role = c("ctb") , comment = "additional features" ), person( "Kent", "Russel" , role = c("ctb") , comment = "documentation" ), person( "Noam", "Ross" , role = c("ctb") , comment = "fixes" ), person( "Duncan", "Garmonsway" , role = c("ctb") , comment = "fixes" ), person( "Christoph", "Glur" , role = c("aut", "cre") , comment = "R interface" , email = "christoph.glur@powerpartners.pro" ) ) VignetteBuilder: knitr, rmarkdown Imports: R6, stringi, methods Suggests: Formula, graphics, testthat, knitr, rmarkdown, ape, yaml, networkD3, jsonlite, treemap, party, partykit, doParallel, foreach, htmlwidgets, DiagrammeR (>= 1.0.0), mockery, rpart Enhances: igraph Description: Create tree structures from hierarchical data, and traverse the tree in various orders. Aggregate, cumulate, print, plot, convert to and from data.frame and more. Useful for decision trees, machine learning, finance, conversion from and to JSON, and many other applications. License: GPL (>= 2) URL: https://github.com/gluc/data.tree BugReports: https://github.com/gluc/data.tree/issues Depends: R (>= 3.5) RoxygenNote: 7.2.3 Encoding: UTF-8 ================================================ FILE: NAMESPACE ================================================ # Generated by roxygen2: do not edit by hand S3method(as.Node,BinaryTree) S3method(as.Node,data.frame) S3method(as.Node,dendrogram) S3method(as.Node,list) S3method(as.Node,party) S3method(as.Node,phylo) S3method(as.Node,rpart) S3method(as.data.frame,Node) S3method(as.dendrogram,Node) S3method(as.list,Node) S3method(plot,Node) S3method(print,Node) export(.parentSeparator) export(.separator) export(Aggregate) export(AreNamesUnique) export(Climb) export(Clone) export(CreateRandomTree) export(CreateRegularTree) export(Cumulate) export(DefaultPlotHeight) export(Distance) export(Do) export(FindNode) export(FormatFixedDecimal) export(FormatPercent) export(FromDataFrameNetwork) export(FromDataFrameTable) export(FromListExplicit) export(FromListSimple) export(Get) export(GetAttribute) export(GetDefaultTooltip) export(GetPhyloNr) export(NODE_RESERVED_NAMES_CONST) export(Navigate) export(Node) export(Prune) export(Revert) export(Set) export(SetEdgeStyle) export(SetFormat) export(SetGraphStyle) export(SetNodeStyle) export(Sort) export(ToDataFrameNetwork) export(ToDataFrameTable) export(ToDataFrameTree) export(ToDataFrameTypeCol) export(ToDiagrammeRGraph) export(ToListExplicit) export(ToListSimple) export(ToNewick) export(Traverse) export(as.Node) export(as.igraph.Node) export(as.phylo.Node) export(averageBranchingFactor) export(isLeaf) export(isNotLeaf) export(isNotRoot) export(isRoot) import(methods) import(stats) import(stringi) importFrom(R6,R6Class) ================================================ FILE: NEWS ================================================ # All changes to data.tree are documented here. ## Version 1.1.0 - IMPROVE: Node names may no longer be `NA`. It was not fully supported, and now it is an error. (#152) - FIX: changed lock_object to lock_objects (#149 thx to Olly Beagly) - FIX: help('data.tree') works again - IMPROVE: FromListSimple and as.Node.list now have an additional parameter 'interpretNullAsList'. See #169 for details. - IMPROVE: added parameter `row.names = FALSE`in `print.Node()` to hide row numbers when printing a data.tree. ## Version 1.0.0 - IMPROVE: Replaced dependency on stringr by dependency on stringi, which make data.tree even more light-weight - CHANGE: Node serialization changed. In many cases, you might still be able to load previously saved data.tree objects, but then they do not correspond to the latest version. - CHANGE: New reserved words for Node: attributes and attributesAll - NOTE: Node$fields and Node$fieldsAll will be deprecated in the next version. Use Node$attributes and Node$attributesAll instead - REMOVE: Deprecated Node$FindNode has been removed (use FindeNode(node, ...) instead) - IMPROVE: Node is now fully documented, (thx to roxygen2 for supporting R6) - CHANGE: data.tree now depends on R 3.5 - FIX: adjusted sample data that didn't support correct handling of active bindings - IMPROVE: Adding reserved word check to FromDataFrameNetwork (#147 thx to wkumler) ## Version 0.7.11 - IMPROVE: diagrammeR is now only suggested, so data.tree is much more lightweight if plotting is not needed (#143 thx to Russ Hyde) - FIX: plot now also works with quotations in names (#137 thx to thotal) - IMPROVE: as.list and other conversions to list now contain pruneFun argument (#142) ## Version 0.7.10 - IMPROVE: various spell errors fixed in vignettes ## Version 0.7.9 - FIX: Bug in as.data.frame.Node: NA for certain lists slots (#135) - FIX: Get prints Null as NA by default (#128) ## Version 0.7.8 - FIX: wrong export of S3 ## Version 0.7.7 - IMPROVE: if an attribute of a node has n dimensions, then Get will return an array of dim n+1, with the names of the first dimension being equal to the node names (thanks to Chris Hammil) - FIX: fieldsAll now doesn't simplify (thanks to Vaclav Slimacek) ## Version 0.7.6 - IMPROVE: plot now also works for trees with a single root node (thx to Pierre Neuvial) - IMPROVE: ellipsis parameters are not passed to DiagrammeR for plotting (#109) - FIX: Dependency on DiagrammeR (>= 1.0.0) explicitly stated (#111) - IMPROVE: Allow tibble in FromDataFrameNetwork and FromDataFrameTable (#115) ## Version 0.7.5 - IMPROVE: namesNotUnique parameter in as.Node.phylo (#106) - FIX: fixed incompatibility issue with DiagrammeR (#110) ## Version 0.7.4 - IMPROVE: print now has an explicit arg pruneFun - FIX: partykit tests now pass - FIX: DESCRIPTION now in line with latest changes from CRAN (rmarkdown declared) - FIX: Skipped tests in testMethods included ## Version 0.7.3 - FIX: Various typos in documentation ## Version 0.7.2 Upgrade to R 3.4.x and newest package versions. - FIX: plot: global graph attributes now work (#88) - FIX: typo in vignette data.tree vignette('applications', package = "data.tree") - FIX: warnings in as.dendrogram - FIX: warnings in sample code for Do - FIX: as.data.frame created warnings for values that were of length 0 ## Version 0.7.0 - ADD: ToDiagrammeRGraph to convert to a DiagrammeR graph object - REMOVE: ToGraphViz (replaced with ToDiagrammeRGraph, to support the latest features in the DiagrammeR package). You can still get the dot representation by using DiagrammeR::generate_dot(ToDiagrammeRGraph(node)) - CHANGE: plot.Node 's last parameter is now 'graph', and not 'engine' anymore. - FIX: minor typos fixed ## Version 0.6.2 - ADD: new Distance function to measure distance from one Node to another in the same tree ## Version 0.6.1 - IMPROVE: FromListSimple now accepts subclasses of lists (#79) - IMPROVE: FromDataFrameTable now supports tibbles (#89) - IMPROVE: print.Node and as.data.frame.Node now also work for node fields with length > 1 (#81) - FIX: print.Node and as.data.frame.Node now also work if some Nodes have the same name as some fileds (#82) - REMOVE: node$FindNode, and node$Navigate are now deprecated. Use FindNode(node, ...) and Naviate(node, ...) instead - REMOVE: node$Sort, node$Prune, and node$Revert are now deprecated. Use Sort(node, ...), Prune(node, ...) and Revert(node, ...) instead ## Version 0.5.0 - IMPROVE: Performance improvement for many functions. For example, as.Node roughly by factor 4 for large dataset (#74) - CHANGE: by default, as.data.frame.Node (and derivatives) do not format anymore (use the format parameter if you want to format) - IMPROVE: Allow the possibility to keep only some fields when converting to list using as.list.Node (#76) - FromDataFrameTable (#77) - FIX: now also works if there is only the pathString column - IMPROVE: pathString can now also be a factor (or any other type convertible to character) ## Version 0.4.0 - IMPROVE: as.Node.data.frame and FromDataFrameTable now support paths containing reserved words (#65) - CHANGE: Node$new now checks that names are not reserved names. As a consequence, many conversions to Node now contain a check parameter. - IMPROVE: Climb is now much faster when climbing by name (#71) - IMPROVE: As a result of #71, many other functions are much faster, e.g. FromDataFrameTable (#72) ## Version 0.3.7 - ADD: Traverse can now also take custom function as a traversal argument - ADD: Navigate method - ADD: as.Node.BinaryTree Convert SplittingNode from party package to data.tree (#6) - ADD: as.Node.party Convert party class from partykit package to data.tree (#6) ## Version 0.3.6 - FIX: GetDefaultTooltip now also works for attributes which are functions - FIX: GetAttribute now returns attributes with length 0 (e.g. an empty list) - ADD: Sort, Revert and Prune are now also available in traditional format (e.g. Prune(node, pruneFun)) - FIX: FromListSimple: Empty lists now become empty nodes (#59) - IMPROVE: FromListSimple: Unnamed list elements are now also converted (#61) - IMPROVE: documentation of Aggregate - IMPROVE: Check type when setting Node$parent and Node$children (#63) ## Version 0.3.5 - FIX: minor correction in documentation ## Version 0.3.4 - FIX: minor correction in documentation ## Version 0.3.3 - CHANGE: Renamed Find method to FindNode, in order to avoid masking from base - FIX: upgrade to latest version of treemap package - FIX: a few typos in documentation ## Version 0.3.2 - ADD: FromListExplicit now interprets character vectors as a list of nodes (#58) ## Version 0.3.1 - IMPROVE: as.list.Node - now generates auto name if unique name is not available (#54) - now has warn arg, warning if source data contains reserved names - now also imports fiels with names equal to reserved names (e.g. count), they will be renamed (to e.g. count2) - CHANGE: node$leaves now returns a list even when called on a leaf itself - ADD: Find method to find a single Node in a (sub-)tree (#52) ## Version 0.3.0 Pine Tree - REMOVE: Removed the cacheAttribute parameter from Aggregate and Cumulate (they were confusing, even to me. Use Do instead to manually store aggregate values in the tree) - ADD: plot function (see ?plot.Node) - ADD: ToDataFrameTypeCol to export e.g. the path to columns by level in columns: ToDataFrameTypeCol(acme) - ADD: Node$AddSibling - ADD: Node$RemoveAttribute now contains a mandatory parameter so that it can be used if the node does not have the attribute to be removed. - ADD: Get works on methods without args - IMPROVE: FormatFixedDecimal and FormatPercent work for NULL values - IMPROVE: Documentation - FIX: Aggregate will not return attribute from callee anymore, but *always* aggregate children attributes - FIX: Removed ... parameter from ToListExplicit and ToListSimple - FIX: Clone was adding empty children list, which caused a series of problems (#44) - FIX: Cloning a subtree does not keep reference to un-cloned parent anymore (#49) - FIX: print with limit parameter ignored formatter (#43) - FIX: cannot rename to int, e.g. acme$Do(function(x) x$name <- x$position) (#53) ## Version 0.2.4 - FIX: applications vignette, changed from http://htmlwidgets.org to http://www.htmlwidgets.org, as requested by CRAN ## Version 0.2.3 - FIX #33: applications vignette doesn't build because of DiagrammeR update - FIX #32: Cannot subclass Node - FIX #30: strange errors when using data.tree multiple times ## Version 0.2.2 - FIX: Get can now fetch vectors and matrices too - ADD: Node$siblings ## Version 0.2.0-rc.1 Elder - ADD: ClimbByAttribute - FIX: Aggregate and Cumulate now work always on attributes having a formatter - ADD: as.igraph now has a 'directed' parameter - ADD: print now has a pruneMethod, allowing different methods to avoid that a huge tree is printed to the console - REMOVE: FromDataFrameTaxonomy and ToDataFrameTaxonomy (replaced by FromDataFrameNetwork and ToDataFrameNetwork, but with some differences) - ADD: FromDataFrameNetwork and ToDataFrameNetwork - IMPROVE: make Traversal "level" much faster - ADD: Node$RemoveChild - ADD: Node$RemoveAttribute - ADD: as.igraph.Node now supports different directions (climb and descend) ## Version 0.1.9 Pine II - Set correct version number in DESCRIPTION file ## Version 0.1.8 Pine - Node - CHANGE: Node$depth is now called Node$height, as the old naming was confusing for many, because in CS, the Node$level is sometimes - Utils - CHANGE: Renamed CreateDummyTree to CreateRegularTree - CHANGE: Height renamed to DefaultPlotHeight, so as to avoid confusion with Node$height - ADD: CreateRandomTree to test trees - ADD: trees can now be climbed directly, e.g. acme$IT$`Go agile` - ADD: print.Node with limit parameter is now much faster - ADD: Clone is now much faster ## Version 0.1.7 Chestnut - General - ADD: demo portfolio - ADD: demo decisiontree - ADD: demo population / treemap - Node - CHANGE: Node$level is now 1-based (used to be: 0-based), i.e. if Node$isRoot then Node$level = 1 - CHANGE: Node$Find is now called Node$Climb to avoid confusion with base::Find called depth - ADD: print.Node contains a limit parameter, allowing to limit the max number of Nodes to be printed - ADD: Clone (returning a deep copy) - ADD: Prune (pruning the tree) - ADD: SetFromat (support for setting formatter functions on a Node) - ADD: Traverse, standalone traverse method that can be used for piping and whenever you need to apply multiple Get/Set/Do on the same traversal - ADD: Node$isBinary active - ADD: standalone versions of isLeaf, isNotLeaf, isRoot, isNotRoot for concise filtering - ADD: AreNamesUnique to test if names of the node's are unique throughout the tree (and not only among siblings) - FIX: node$position now returns 1 for root - ADD: Aggregate function now supports functions - ADD: node$averageBranchingFactor - CHANGE: Aggregate function does not cache anymore by default. See cacheAttribute for details. - Node$Get: - CHANGE: Renamed filterFun parameter to pruneFun - ADD: new parameter filterFun, as opposed to pruneFun - CHANGE: removed the assign parameter (use Do instead) - ADD: new traversal modes "in-order", "level" - ADD: parameter inheritFromAncestors - Node$Set: - ADD: filterFun and pruneFun - ADD: support for traversal order - Node$Do: - ADD: new function Do, which applies a function to Nodes - Conversions - ADD: conversion to and from list of lists (and thus to and from yaml, json, etc.) - ADD: conversion from data.frame - ADD: conversion to and from dendrogram - ADD: conversion to and from phylo from the ape package - ADD: conversion to Newick notation - ADD: conversion ToDataFrameTable (returning leafs only) - ADD: conversion ToDataFrameTree - ADD: conversion ToDataFrameTaxonomy - ADD: conversion to igraph - Utils - CHANGE: Renamed PrintFixedDecimal to FormatFixedDecimal to achieve better consistency - ADD: CreateDummyTree to test large trees - ADD: CreateRandomTree to test trees ================================================ FILE: R/data.tree-package.R ================================================ #' data.tree: Hierarchical Data Structures #' #' \code{data.tree} is to hierarchical data what \code{data.frame} is to tabular data: An extensible, general purpose structure to store, manipulate, #' and display hierarchical data. #' #' @section Introduction: #' #' Hierarchical data is ubiquitous in statistics and programming (XML, search trees, family trees, classification, file system, etc.). However, no general-use \bold{tree data structure} is available in R. #' Where tabular data has \code{data.frame}, hierarchical data is often modeled in lists of lists or similar makeshifts. These #' structures are often difficult to manage. #' This is where the \code{data.tree} package steps in. It lets you build trees of hierarchical #' data for various uses: to print, to rapid prototype search algorithms, to test out new classification algorithms, and much more. #' #' @section Tree Traversal: #' #' \code{data.tree} allows to \code{\link{Traverse}} trees in various orders (pre-order, post-order, level, etc.), and it lets you run operations on \code{\link{Node}s} via #' \code{\link{Do}}. #' Similarly, you can collect and store data while traversing a tree using the \code{\link{Get}} and the \code{\link{Set}} methods. #' #' @section Methods: #' #' The package also contains utility functions to \code{\link{Sort}}, to \code{\link{Prune}}, to \code{\link{Aggregate}} and \code{\link{Cumulate}} #' and to \code{\link{print}} in custom formats. #' #' #' @section Construction and Conversion: #' #' The package also contains many conversions from and to data.tree structures. Check out the see also section of \code{\link{as.Node}}. #' #' You can construct a tree from a \code{data.frame} using \code{\link{as.Node.data.frame}}, and convert it back using \code{\link{as.data.frame.Node}}. #' Similar options exist for list of lists. #' For more specialized conversions, see \code{\link{as.dendrogram.Node}}, \code{\link{as.Node.dendrogram}}, #' \code{\link{as.phylo.Node}} and \code{\link{as.Node.phylo}} #' #' Finally, easy conversion options from and to list, dataframe, JSON, YAML, igraph, ape, rpart, party and more exist: #' #' \itemize{ #' \item{list: both directions} #' \item{dataframe: both directions} #' \item{JSON, YAML: both directions, via lists} #' \item{igraph: from igraph to data.tree} #' \item{ape: both directions} #' \item{rpart: from rpart to data.tree} #' \item{party: from party to data.tree} #' } #' #' @section Node and Reference Semantics: #' #' The entry point to the package is \code{\link{Node}}. Each tree is composed of a number of \code{Node}s, referencing each other. #' #' One of most important things to note about \code{data.tree} is that it exhibits \bold{reference semantics}. In a nutshell, this means that you can modify #' your tree along the way, without having to reassign it to a variable after each modification. By and large, this is a rather exceptional behavior #' in R, where value-semantics is king most of the time. #' #' @section Applications: #' #' \code{data.tree} is not optimised for computational speed, but for implementation speed. Namely, its memory #' footprint is relatively large compared to traditional R data structures. However, it can easily handle trees with #' several thousand nodes, and once a tree is constructed, operations on it are relatively fast. #' data.tree is always useful when #' \itemize{ #' \item{you want to develop and test a new algorithm} #' \item{you want to import and convert tree structures (it imports and exports to list-of-list, data.frame, yaml, json, igraph, dendrogram, phylo and more)} #' \item{you want to play around with data, display it and get an understanding} #' \item{you want to test another package, to compare it with your own results} #' \item{you need to do homework} #' } #' #' For a quick overview of the features, read the \code{\link{data.tree}} vignette by running \code{vignette("data.tree")}. For stylized #' applications, see \code{vignette("applications", package='data.tree')} #' #' @examples #' data(acme) #' print(acme) #' acme$attributesAll #' acme$count #' acme$totalCount #' acme$isRoot #' acme$height #' print(acme, "p", "cost") #' #' outsource <- acme$IT$Outsource #' class(outsource) #' print(outsource) #' outsource$attributes #' outsource$isLeaf #' outsource$level #' outsource$path #' outsource$p #' outsource$parent$name #' outsource$root$name #' outsource$expCost <- outsource$p * outsource$cost #' print(acme, "expCost") #' #' acme$Get("p") #' acme$Do(function(x) x$expCost <- x$p * x$cost) #' acme$Get("expCost", filterFun = isLeaf) #' #' ToDataFrameTable(acme, "name", "p", "cost", "level", "pathString") #' ToDataFrameTree(acme, "name", "p", "cost", "level") #' ToDataFrameNetwork(acme, "p", "cost") #' #' #' @seealso \code{\link{Node}} #' @seealso For more details, see the \code{data.tree} vignette by running: \code{vignette("data.tree")} #' @name data.tree #' @keywords internal "_PACKAGE" NULL ================================================ FILE: R/data_doc.R ================================================ #' Sample Data: A Simple Company with Departments #' #' acme's tree representation is accessed through its root, acme. #' #' \itemize{ #' \item cost, only available for leaf nodes. Cost of the project. #' \item p probability that a project will be undertaken. #' } #' #' @docType data #' @keywords datasets #' @name acme #' @usage data(acme) #' @format A data.tree root Node NULL #' Sample Data: Data Used by the ID3 Vignette #' #' mushroom contains attributes of mushrooms. We can use this data to predict a #' mushroom's toxicity based on its attributes. #' The attributes available in the data set are: #' #' \itemize{ #' \item color the color of a mushroom #' \item size whether a mushroom is small or large #' \item points whether a mushroom has points #' \item edibility whether a mushroom is edible or toxic #' } #' #' @docType data #' @keywords datasets #' @name mushroom #' @usage data(mushroom) #' @format data.frame NULL ================================================ FILE: R/node.R ================================================ #' Names that are reserved by the Node class. #' #' These are reserved by the Node class, you cannot use these as #' attribute names. #' Note also that all attributes starting with a . are reserved. #' #' @export NODE_RESERVED_NAMES_CONST <- c( 'AddChild', 'AddChildNode', 'AddSibling', 'AddSiblingNode', 'attributes', 'attributesAll', 'averageBranchingFactor', 'children', 'Climb', 'Navigate', 'FindNode', 'clone', 'count', 'Do', 'fields', 'fieldsAll', 'Get', 'GetAttribute', 'height', 'initialize', 'isBinary', 'isLeaf', 'isRoot', 'leafCount', 'leaves', 'level', 'levelName', 'name', 'parent', 'path', 'pathString', 'position', 'printFormatters', 'Prune', 'Revert', 'RemoveAttribute', 'RemoveChild', 'root', 'Set', 'siblings', 'Sort', 'totalCount', '.*') #' Create a \code{data.tree} Structure With \code{Nodes} #' #' @description \code{Node} is at the very heart of the \code{data.tree} package. All trees are constructed #' by tying together \code{Node} objects. #' #' @details Assemble \code{Node} objects into a \code{data.tree} #' structure and use the traversal methods to set, get, and perform operations on it. Typically, you construct larger tree #' structures by converting from \code{data.frame}, \code{list}, or other formats. #' #' Most methods (e.g. \code{node$Sort()}) also have a functional form (e.g. \code{Sort(node)}) #' #' @docType class #' @importFrom R6 R6Class #' #' #' @usage # n1 <- Node$new("Node 1") #' #' @examples #' library(data.tree) #' acme <- Node$new("Acme Inc.") #' accounting <- acme$AddChild("Accounting")$ #' AddSibling("Research")$ #' AddChild("New Labs")$ #' parent$ #' AddSibling("IT")$ #' AddChild("Outsource") #' print(acme) #' #' #' @param name the name of the node to be created #' @param check Either #' \itemize{ #' \item{\code{"check"}: if the name conformance should be checked and warnings should be printed in case of non-conformance (the default)} #' \item{\code{"no-warn"}: if the name conformance should be checked, but no warnings should be printed in case of non-conformance (if you expect non-conformance)} #' \item{\code{"no-check" or FALSE}: if the name conformance should not be checked; use this if performance is critical. However, in case of non-conformance, expect cryptic follow-up errors} #' } #' @param ... A name-value mapping of node attributes #' @param attribute determines what is collected. The \code{attribute} can be #' \itemize{ #' \item a.) the name of a \bold{field} or a \bold{property/active} of each \code{Node} in the tree, e.g. \code{acme$Get("p")} or \code{acme$Get("position")} #' \item b.) the name of a \bold{method} of each \code{Node} in the tree, e.g. \code{acme$Get("levelZeroBased")}, where e.g. \code{acme$levelZeroBased <- function() acme$level - 1} #' \item c.) a \bold{function}, whose first argument must be a \code{Node} e.g. \code{acme$Get(function(node) node$cost * node$p)} #' } #' #' @param recursive if \code{TRUE}, the method will be called recursively on the \code{Node}'s children. This allows sorting an entire tree. #' @param traversal defines the traversal order to be used. This can be #' \describe{ #' \item{pre-order}{Go to first child, then to its first child, etc.} #' \item{post-order}{Go to the first branch's leaf, then to its siblings, and work your way back to the root} #' \item{in-order}{Go to the first branch's leaf, then to its parent, and only then to the leaf's sibling} #' \item{level}{Collect root, then level 2, then level 3, etc.} #' \item{ancestor}{Take a node, then the node's parent, then that node's parent in turn, etc. This ignores the \code{pruneFun} } #' \item{function}{You can also provide a function, whose sole parameter is a \code{\link{Node}} object. The function is expected to return the node's next node, a list of the node's next nodes, or NULL.} #' } #' Read the data.tree vignette for a detailed explanation of these traversal orders. #' #' #' @param pruneFun allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. #' If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered. #' #' @param filterFun allows providing a a filter, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. #' Note that if filter returns \code{FALSE}, then the node will be excluded from the result (but not the entire subtree). #' #' #' @seealso For more details see the \code{\link{data.tree}} documentations, or the \code{data.tree} vignette: \code{vignette("data.tree")} #' #' #' @export #' @format An \code{\link{R6Class}} generator object Node <- R6Class("Node", lock_objects = FALSE, lock_class = TRUE, portable = TRUE, class = TRUE, cloneable = TRUE, public = list( #' @description Create a new \code{Node} object. This is often used to create the root of a tree when creating a tree programmatically. #' #' @examples #' node <- Node$new("mynode", x = 2, y = "value of y") #' node$y #' #' @return A new `Node` object initialize=function(name, check = c("check", "no-warn", "no-check"), ...) { if (!missing(name)) { name <- as.character(name) if (length(name) != 1) { stop("Node name must be a scalar") } else if (is.na(name)) { stop("Node name must be a non-NA character scalar") } name <- CheckNameReservedWord(name, check) private$p_name <- name } if (!missing(...)) { args <- list(...) mapply(FUN = function(arg, nme) self[[nme]] <- arg, args, names(args)) } invisible (self) }, #################### # Tree creation #' @description Creates a \code{Node} and adds it as the last sibling as a child to the \code{Node} on which this is called. #' #' @examples #' root <- Node$new("myroot", myname = "I'm the root") #' root$AddChild("child1", myname = "I'm the favorite child") #' child2 <- root$AddChild("child2", myname = "I'm just another child") #' child3 <- child2$AddChild("child3", myname = "Grandson of a root!") #' print(root, "myname") #' #' @return The new \code{Node} (invisibly) AddChild = function(name, check = c("check", "no-warn", "no-check"), ...) { child <- Node$new(as.character(name), check, ...) invisible (self$AddChildNode(child)) }, #' @description Adds a \code{Node} as a child to this node. #' #' @param child The child \code{"Node"} to add. #' #' @examples #' root <- Node$new("myroot") #' child <- Node$new("mychild") #' root$AddChildNode(child) #' #' @return the child node added (this lets you chain calls) AddChildNode = function(child) { private$p_children[[child$name]] <- child self[[child$name]] <- child child$parent <- self invisible (child) }, #' @description Creates a new \code{Node} called \code{name} and adds it after this \code{Node} as a sibling. #' #' @examples #' #' root <- Node$new("myroot") #' child <- root$AddChild("child1") #' sibling <- child$AddSibling("sibling1") #' #' @return the sibling node (this lets you chain calls) #' AddSibling = function(name, check = c("check", "no-warn", "no-check"), ...) { sibling <- Node$new(as.character(name), check, ...) invisible (self$AddSiblingNode(sibling)) }, #' @description Adds a \code{Node} after this \code{Node}, as a sibling. #' #' @param sibling The \code{"Node"} to add as a sibling. #' #' @examples #' root <- Node$new("myroot") #' child <- Node$new("mychild") #' sibling <- Node$new("sibling") #' root$AddChildNode(child)$AddSiblingNode(sibling) #' #' @return the added sibling node (this lets you chain calls, as in the examples) #' AddSiblingNode = function(sibling) { if(isRoot(self)) stop("Cannot insert sibling to root!") private$p_parent[[sibling$name]] <- sibling private$p_parent$children <- append(private$p_parent$children, sibling, after = self$position) names(private$p_parent$children)[self$position + 1] <- sibling$name sibling$parent <- private$p_parent invisible (sibling) }, #' @description Remove the child \code{Node} called \code{name} from a \code{Node} and returns it. #' #' @examples #' node <- Node$new("myroot")$AddChild("mychild")$root #' node$RemoveChild("mychild") #' #' @return the subtree spanned by the removed child. RemoveChild = function(name) { if (!name %in% names(private$p_children)) stop(paste0("Node ", self$name, " does not contain child ", name)) child <- private$p_children[[name]] self$RemoveAttribute(name) private$p_children <- private$p_children[-child$position] child$parent <- NULL return (child) }, #' @description Removes attribute called \code{name} from this \code{Node}. #' #' @param stopIfNotAvailable Gives an error if \code{stopIfNotAvailable} and the attribute does not exist. #' #' @examples #' node <- Node$new("mynode") #' node$RemoveAttribute("age", stopIfNotAvailable = FALSE) #' node$age <- 27 #' node$RemoveAttribute("age") #' node #' RemoveAttribute = function(name, stopIfNotAvailable = TRUE) { attAvailable <- name %in% ls(self) if (stopIfNotAvailable && !attAvailable) stop(paste0("Node ", self$name, " does not contain field ", name)) else if (attAvailable) { rm(list = name, envir = self) return (TRUE) } return (FALSE) }, # End Tree Creation ######################## ######################## ## Side Effects #' @description Sort children of a \code{Node} or an entire \code{data.tree} structure #' #' @details #' You can sort with respect to any argument of the tree. But note that sorting has #' side-effects, meaning that you modify the underlying, original data.tree object structure. #' #' See also \code{\link{Sort}} for the equivalent function. #' #' #' @param ... any parameters to be passed on the the attribute (in case it's a method or a #' function) #' @param decreasing sort order #' #' #' @return Returns the node on which Sort is called, invisibly. This can be useful to chain Node methods. #' #' @examples #' data(acme) #' acme$Do(function(x) x$totalCost <- Aggregate(x, "cost", sum), traversal = "post-order") #' Sort(acme, "totalCost", decreasing = FALSE) #' print(acme, "totalCost") #' Sort = function(attribute, ..., decreasing = FALSE, recursive = TRUE) { .Deprecated("Sort(node, ...)") Sort(self, attribute, ..., decreasing = decreasing, recursive = recursive) }, #' @description Reverts the sort order of a \code{Node}'s children. #' #' See also \code{\link{Revert}} for the equivalent function. #' #' #' @return returns the Node invisibly (for chaining) #' #' @seealso \code{\link{Node}} #' @seealso \code{\link{Sort}} #' @export Revert = function(recursive = TRUE) { .Deprecated("Revert(node, ...)") Revert(self, recursive) }, #' @description Prunes a tree. #' #' Pruning refers to removing entire subtrees. This function has side-effects, it modifies your data.tree structure! #' #' See also \code{\link{Prune}} for the equivalent function. #' #' @param pruneFun allows providing a a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. #' If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered. #' @return the number of nodes removed #' #' @examples #' data(acme) #' acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum)) #' Prune(acme, function(x) x$cost > 700000) #' print(acme, "cost") #' Prune = function(pruneFun) { .Deprecated("Prune(node, ...)") Prune(self, pruneFun = pruneFun) }, # End Side Effects ########################### #' @description Climb a tree from parent to children, by provided criteria. #' #' @details #' This method lets you climb the tree, from crutch to crutch. On each \code{Node}, the #' \code{Climb} finds the first child having attribute value equal to the the provided argument. #' #' See also \code{\link{Climb}} and \code{\link{Navigate}} #' #' Climb(node, ...) #' #' #' @param node The root \code{\link{Node}} of the tree or subtree to climb #' @param ... an attribute-value pairlist to be searched. For brevity, you can also provide a character vector to search for names. #' @return the \code{Node} having path \code{...}, or \code{NULL} if such a path does not exist #' #' @examples #' data(acme) #' #' #the following are all equivalent #' Climb(acme, 'IT', 'Outsource') #' Climb(acme, name = 'IT', name = 'Outsource') #' Climb(acme, 'IT')$Climb('Outsource') #' Navigate(acme, path = "IT/Outsource") #' #' Climb(acme, name = 'IT') #' #' Climb(acme, position = c(2, 1)) #' #or, equivalent: #' Climb(acme, position = 2, position = 1) #' Climb(acme, name = "IT", cost = 250000) #' #' tree <- CreateRegularTree(5, 2) #' tree$Climb(c("1", "1"), position = c(2, 2))$path #' #' Climb = function(...) { Climb(self, ...) }, #' @description Navigate to another node by relative path. #' #' #' @param node The starting \code{\link{Node}} to navigate #' @param path A string or a character vector describing the path to navigate #' #' @details The \code{path} is always relative to the \code{Node}. Navigation #' to the parent is defined by \code{..}, whereas navigation to a child #' is defined via the child's name. #' If path is provided as a string, then the navigation steps are separated #' by '/'. #' #' See also \code{\link{Navigate}} and \code{\link{Climb}} #' #' @examples #' data(acme) #' Navigate(acme$Research, "../IT/Outsource") #' Navigate(acme$Research, c("..", "IT", "Outsource")) #' Navigate = function(path) { .Deprecated("Navigate(node, ...)") Navigate(self, path) }, ########################## # Traversal #' @description Traverse a Tree and Collect Values #' #' @details #' The \code{Get} method is one of the most important ones of the \code{data.tree} package. It lets you traverse a tree #' and collect values along the way. Alternatively, you can call a method or a function on each \code{\link{Node}}. #' #' See also \code{\link{Get}}, \code{\link{Node}}, \code{\link{Set}}, \code{\link{Do}}, \code{\link{Traverse}} #' #' #' #' #' @param attribute determines what is collected. The \code{attribute} can be #' \itemize{ #' \item a.) the name of a \bold{field} or a \bold{property/active} of each \code{Node} in the tree, e.g. \code{acme$Get("p")} or \code{acme$Get("position")} #' \item b.) the name of a \bold{method} of each \code{Node} in the tree, e.g. \code{acme$Get("levelZeroBased")}, where e.g. \code{acme$levelZeroBased <- function() acme$level - 1} #' \item c.) a \bold{function}, whose first argument must be a \code{Node} e.g. \code{acme$Get(function(node) node$cost * node$p)} #' } #' @param ... in case the \code{attribute} is a function or a method, the ellipsis is passed to it as additional arguments. #' @param format if \code{FALSE} (the default), no formatting is being used. If \code{TRUE}, then the first formatter (if any) found along the ancestor path is being used for formatting #' (see \code{\link{SetFormat}}). If \code{format} is a function, then the collected value is passed to that function, and the result is returned. #' @param inheritFromAncestors if \code{TRUE}, then the path above a \code{Node} is searched to get the \code{attribute} in case it is NULL. #' @param simplify same as \code{\link{sapply}}, i.e. TRUE, FALSE or "array". Additionally, you can specify "regular" if #' each returned value is of length > 1, and equally named. See below for an example. #' #' @return a vector containing the \code{atrributes} collected during traversal, in traversal order. \code{NULL} is converted #' to NA, such that \code{length(Node$Get) == Node$totalCount} #' #' #' @examples #' data(acme) #' acme$Get("level") #' acme$Get("totalCount") #' #' #' acme$Get(function(node) node$cost * node$p, #' filterFun = isLeaf) #' #' #This is equivalent: #' nodes <- Traverse(acme, filterFun = isLeaf) #' Get(nodes, function(node) node$cost * node$p) #' #' #' #simplify = "regular" will preserve names #' acme$Get(function(x) c(position = x$position, level = x$level), simplify = "regular") #' Get = function(attribute, ..., traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), pruneFun = NULL, filterFun = NULL, format = FALSE, inheritFromAncestors = FALSE, simplify = c(TRUE, FALSE, "array", "regular")) { t <- Traverse(self, traversal = traversal, pruneFun = pruneFun, filterFun = filterFun) Get(t, attribute, ..., format = format, inheritFromAncestors = inheritFromAncestors, simplify = simplify) }, #' @description Executes a function on a set of nodes #' #' @details #' See also \code{\link{Node}}, \code{\link{Get}}, \code{\link{Set}}, \code{\link{Traverse}} #' #' @param fun the function to execute. The function is expected to be either a Method, or to take a #' Node as its first argument #' #' @examples #' data(acme) #' acme$Do(function(node) node$expectedCost <- node$p * node$cost) #' print(acme, "expectedCost") #' Do = function( fun, ..., traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), pruneFun = NULL, filterFun = NULL ) { t <- Traverse(self, traversal = traversal, pruneFun = pruneFun, filterFun = filterFun) Do(t, fun, ...) }, #' @description Traverse a Tree and Assign Values #' #' @details #' The method takes one or more vectors as an argument. It traverses the tree, whereby the values are picked #' from the vector. Also available as OO-style method on \code{\link{Node}}. #' #' See also \code{\link{Node}}, \code{\link{Get}}, \code{\link{Do}}, \code{\link{Traverse}} #' #' #' #' @param ... each argument can be a vector of values to be assigned. Recycled. #' #' @return invisibly returns the nodes (useful for chaining) #' #' @examples #' data(acme) #' acme$Set(departmentId = 1:acme$totalCount, openingHours = NULL, traversal = "post-order") #' acme$Set(head = c("Jack Brown", #' "Mona Moneyhead", #' "Dr. Frank N. Stein", #' "Eric Nerdahl" #' ), #' filterFun = function(x) !x$isLeaf #' ) #' print(acme, "departmentId", "head") #' Set = function(..., traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), pruneFun = NULL, filterFun = NULL) { t <- Traverse(self, traversal = traversal, pruneFun = pruneFun, filterFun = filterFun) Set(t, ...) invisible (self) } # End Traversal ####################### ), active = list( #' @field name Gets or sets the name of a \code{Node}. For example \code{Node$name <- "Acme"}. name = function(value) { if (missing(value)) return (private$p_name) else private$p_name <- changeName(self, private$p_name, value) }, #' @field printFormatters gets or sets the formatters used to print a \code{Node}. #' Set this as a list to a root node. #' The different formatters are h (horizontal), v (vertical), l (L), j (junction), and s (separator). #' For example, you can set the formatters to \code{list(h = "\u2500" , v = "\u2502", l = "\u2514", j = "\u251C", s = " ")} #' to get a similar behavior as in \code{fs::dir_tree()}. #' The defaults are: \code{list(h = "--" , v = "\u00A6", l = "\u00B0", j = "\u00A6", s = " ")} printFormatters = function(value) { if (missing(value)) { # if private$p_print_formatters is not set, return default if (is.null(private$p_print_formatters)) { pf <- list(h = "--" , v = "\u00A6", l = "\u00B0", j = "\u00A6", s = " " ) } else { pf <- private$p_print_formatters } return (pf) } private$p_print_formatters <- value }, #' @field parent Gets or sets the parent \code{Node} of a \code{Node}. Only set this if you know what you are doing, as you might mess up the tree structure! parent = function(value) { if (missing(value)) return (private$p_parent) if (!is.null(value) && !is(value, "Node")) stop("Cannot set the parent to a non-Node!") private$p_parent <- value }, #' @field children Gets or sets the children \code{list} of a \code{Node}. Only set this if you know what you are doing, as you might mess up the tree structure! children = function(value) { if (missing(value)) return (private$p_children) if (!is.null(value) && !is.list(value)) stop("Cannot set children to non-list!") private$p_children <- value }, #' @field isLeaf Returns \code{TRUE} if the \code{Node} is a leaf, \code{FALSE} otherwise isLeaf = function() { isLeaf(self) }, #' @field isRoot Returns \code{TRUE} if the \code{Node} is the root, \code{FALSE} otherwise isRoot = function() { isRoot(self) }, #' @field count Returns the number of children of a \code{Node} count = function() { return (length(private$p_children)) }, #' @field totalCount Returns the total number of \code{Node}s in the tree totalCount = function() { return (1 + sum(as.numeric(sapply(private$p_children, function(x) x$totalCount, simplify = TRUE, USE.NAMES = FALSE)))) }, #' @field path Returns a vector of mode \code{character} containing the names of the \code{Node}s in the path from the root to this \code{Node} path = function() { c(private$p_parent$path, self$name) }, #' @field pathString Returns a string representing the path to this \code{Node}, separated by backslash pathString = function() { paste(self$path, collapse="/") }, #' @field position The position of a \code{Node} within its siblings position = function() { if (isRoot(self)) return (1) result <- which(names(private$p_parent$children) == self$name) # match(self$name, names(private$p_parent$children)) return (result) }, #' @field fields Will be deprecated, use \code{attributes} instead fields = function() { .Deprecated("Node$attributes", old = "Node$fields") return(self$attributes) }, #' @field fieldsAll Will be deprecated, use \code{attributesAll} instead fieldsAll = function() { .Deprecated("Node$attributesAll", old = "Node$fieldsAll") return(self$attributesAll) }, #' @field attributes The attributes defined on this specific node attributes = function() { nms <- ls(self) nms <- nms[!(nms %in% NODE_RESERVED_NAMES_CONST)] nms <- nms[!(nms %in% names(private$p_children))] nms <- nms[!(stri_sub(nms, 1, 1) == '.')] return (nms) }, #' @field attributesAll The distinct union of attributes defined on all the nodes in the tree spanned by this \code{Node} attributesAll = function() { as.vector(na.omit(unique(unlist(Get(Traverse(self), "attributes", simplify = FALSE))))) }, #' @field levelName Returns the name of the \code{Node}, preceded by level times '*'. Useful for printing and not typically called by package users. levelName = function() { paste0(.separator(self), self$name) }, #' @field leaves Returns a list containing all the leaf \code{Node}s leaves = function() { if (self$isLeaf) { return (list(self)) } else { unlist(sapply(private$p_children, function(x) x$leaves)) } }, #' @field leafCount Returns the number of leaves are below a \code{Node} leafCount = function() { length(Traverse(self, filterFun = isLeaf)) }, #' @field level Returns an integer representing the level of a \code{Node}. For example, the root has level 1. level = function() { if (isRoot(self)) { return (1) } else { return (1 + private$p_parent$level) } }, #' @field height Returns max(level) of any of the \code{Nodes} of the tree height = function() { if (isLeaf(self)) return (1) max(Get(Traverse(self, filterFun = function(x) isLeaf(x) && x$position == 1), "level")) - self$level + 1 }, #' @field isBinary Returns \code{TRUE} if all \code{Node}s in the tree (except the leaves) have \code{count = 2} isBinary = function() { all(2 == Get(Traverse(self, filterFun = function(x) !x$isLeaf), "count")) }, #' @field root Returns the root of a \code{Node} in a tree. root = function() { if (isRoot(self)) { invisible (self) } else { invisible (private$p_parent$root) } }, #' @field siblings Returns a \code{list} containing all the siblings of this \code{Node} siblings = function() { if (isRoot(self)) { return (list()) } else { private$p_parent$children[names(private$p_parent$children) != self$name] } }, #' @field averageBranchingFactor Returns the average number of crotches below this \code{Node} averageBranchingFactor = function() { averageBranchingFactor(self) } ), private = list( p_name = "", p_children = NULL, p_parent = NULL ) ) ================================================ FILE: R/node_actives.R ================================================ #' Check if a \code{Node} is the root #' #' @param node The Node to test. #' @return TRUE if the Node is the root, FALSE otherwise #' @export isRoot <- function(node) { is.null(node$parent) } #' Check if a \code{Node} is not a root #' #' @param node The Node to test. #' @return FALSE if the Node is the root, TRUE otherwise #' @export isNotRoot <- function(node) { !isRoot(node) } #' Check if a \code{Node} is a leaf #' #' @param node The Node to test. #' @return TRUE if the Node is a leaf, FALSE otherwise #' @export isLeaf <- function(node) { length(node$children) == 0 } #' Check if a \code{Node} is not a leaf #' #' @param node The Node to test. #' @return FALSE if the Node is a leaf, TRUE otherwise #' @export isNotLeaf <- function(node) { !isLeaf(node) } changeName <- function(node, oldName, newName) { if(!isRoot(node)) { rm(list = oldName, envir = node$parent) names(node$parent$children)[node$position] <- newName node$parent[[as.character(newName)]] <- node } return (newName) } #' @export .separator <- function(self) { if (isRoot(self)) return("") if (self$position == self$parent$count) mySeparator <- paste0(self$root$printFormatters$s, self$root$printFormatters$l, self$root$printFormatters$h) else mySeparator <- paste0(self$root$printFormatters$s, self$root$printFormatters$j, self$root$printFormatters$h) return (paste0(.parentSeparator(self$parent), mySeparator)) } #' @export .parentSeparator <- function(self) { if (isRoot(self)) return("") if (self$position == self$parent$count) mySeparator <- paste0(rep(self$root$printFormatters$s, 4), collapse = "") else mySeparator <- paste0(self$root$printFormatters$s, self$root$printFormatters$v, self$root$printFormatters$s, self$root$printFormatters$s) paste0(.parentSeparator(self$parent), mySeparator) } #' Calculate the average number of branches each non-leaf has #' #' @param node The node to calculate the average branching factor for #' @export averageBranchingFactor <- function(node) { t <- Traverse(node, filterFun = isNotLeaf) if (length(t) == 0) return (0) cnt <- Get(t, "count") if (!is.numeric(cnt)) browser() return (mean(cnt)) } ================================================ FILE: R/node_conversion.R ================================================ #' Convert an object to a \code{data.tree} data structure #' #' @param x The object to be converted #' @param ... Additional arguments #' #' @family as.Node #' #' @export as.Node <- function(x, ...) { UseMethod("as.Node") } #' Write a \code{data.tree} structure to Newick notation #' #' To read from Newick, you can use the \code{ape} package, and convert the resulting \code{phylo} #' object to a \code{data.tree} structure. #' #' @param node The root \code{Node} of a tree or sub-tree to be converted #' @param heightAttribute The attribute (field name, method, or function) storing or calculating the height for each \code{Node} #' @param ... parameters that will be passed on the the heightAttributeName, in case it is a function #' #' @import stringi #' #' @examples #' data(acme) #' ToNewick(acme) #' ToNewick(acme, heightAttribute = NULL) #' ToNewick(acme, heightAttribute = function(x) DefaultPlotHeight(x, 200)) #' ToNewick(acme, rootHeight = 200) #' #' @family Conversions from Node #' #' @keywords Newick #' #' @export ToNewick <- function(node, heightAttribute = DefaultPlotHeight, ...) { deparse <- function(x) { name <- stri_replace_all_fixed(x$name, " ", "_") name <- stri_replace_all_fixed(name, ",", "") if(!isRoot(x) && length(heightAttribute) > 0) { edge <- GetAttribute(x$parent, heightAttribute, ...) - GetAttribute(x, heightAttribute, ...) me <- paste0(name, ":", edge) } else { me <- name } return(me) } Newick <- function(x) { if(x$isLeaf) { return (deparse(x)) } chNewick <- sapply(x$children, Newick) chNewickStr <- paste(chNewick, collapse = ",") res <- paste0("(", chNewickStr, ")", deparse(x)) } res <- Newick(node) res <- paste0(res, ";") return (res) } ================================================ FILE: R/node_conversion_ape.R ================================================ #' Convert a \code{Node} to a phylo object from the ape package. #' #' This method requires the ape package to be installed and loaded. #' #' @param x The root \code{Node} of the tree or sub-tree to be converted #' @param heightAttribute The attribute (field name or function) storing the height #' @param ... any other argument #' #' @examples #' library(ape) #' data(acme) #' acmephylo <- as.phylo(acme) #' #plot(acmephylo) #' #' #' @family ape phylo conversions #' #' @export as.phylo.Node <- function(x, heightAttribute = DefaultPlotHeight, ...) { txt <- ToNewick(x, heightAttribute) return (ape::read.tree(text = txt)) } #' Convert a \code{phylo} object from the ape package to a \code{Node} #' #' @param x The phylo object to be converted #' @param heightName If the phylo contains edge lengths, then they will be converted #' to a height and stored in a field named according to this parameter (the default is "height") #' @param replaceUnderscores if TRUE (the default), then underscores in names are replaced with spaces #' @param namesNotUnique if TRUE, then the \code{name} of the \code{Node}s will be prefixed with a unique id. #' This is useful if the children of a parent have non-unique names. #' @param ... any other parameter to be passed to sub-implementations #' #' @examples #' #which bird familes have the max height? #' library(ape) #' data(bird.families) #' bf <- as.Node(bird.families) #' height <- bf$height #' t <- Traverse(bf, filterFun = function(x) x$level == 25) #' Get(t, "name") #' #' @family ape phylo conversions #' @family as.Node #' #' @export as.Node.phylo <- function(x, heightName = "plotHeight", replaceUnderscores = TRUE, namesNotUnique = FALSE, ...) { #find root node rootNr <- unique(x$edge[,1][!x$edge[,1] %in% x$edge[,2]]) #names nodeNrs <- c(rootNr, unique(x$edge[,2])) leafNrs <- 1:length(x$tip.label) nms <- x$tip.label names(nms) <- leafNrs if("node.label" %in% names(x)) { nms2 <- x$node.label } else { nms2 <- (max(leafNrs) + 1):max(nodeNrs) } names(nms2) <- (max(leafNrs) + 1):max(nodeNrs) nms <- c(nms2, nms) root <- Node$new(rootNr) for (i in 1:nrow(x$edge)) { e <- x$edge[i,] fifu <- function(x) x$name == as.character(e[1]) parent <- Traverse(root, filterFun = fifu)[[1]] child <- parent$AddChild(as.character(e[2])) } if (length(x$edge.length) > 0) { t <- Traverse(root, filterFun = isNotRoot) Set(t, edgeLength = x$edge.length) #try converting edge length to height root[[heightName]] <- 0 ehf <- function(x) x[[heightName]] <- x$parent[[heightName]] - x$edgeLength Do(t, ehf) corr <- min(Get(t, heightName)) root$Do(function(x) x[[heightName]] <- x[[heightName]] - corr) Do(t, function(x) rm("edgeLength", envir = x)) } setName <- function(x) { if (replaceUnderscores) nm <- stri_replace_all_fixed( nms[[x$name]], "_", " ") else nm <- nms[[x$name]] if (namesNotUnique) x$name <- paste0(x$name, ": ", nm) else x$name <- nm } root$Do(setName) return (root) } #' Determine the number a \code{Node} has after conversion to a phylo object #' #' Use this function when plotting a Node as a phylo, e.g. to set custom #' labels to plot. #' #' @param x The Node #' @param type Either "node" (the default) or "edge" (to get the number of the edge from \code{x} to its parent) #' @return an integer representing the node #' #' @examples #' library(ape) #' library(data.tree) #' data(acme) #' ap <- as.phylo(acme) #' #plot(ap) #' #nodelabels("IT Dep.", GetPhyloNr(Climb(acme, "IT"))) #' #edgelabels("Good!", GetPhyloNr(Climb(acme, "IT", "Switch to R"), "edge")) #' #' #' @family ape phylo conversions #' #' @export GetPhyloNr <- function(x, type = c("node", "edge")) { type <- type[1] if (type == "node") { t <- c(Traverse(x$root, filterFun = isLeaf), Traverse(x$root, filterFun = isNotLeaf)) } else if (type == "edge") { t <- Traverse(x$root, filterFun = isNotRoot) } else { stop("Only node or edge allowed as type") } res <- which(sapply(t, function(z) identical(z, x))) return (res) } ================================================ FILE: R/node_conversion_dataframe.R ================================================ #' Convert a \code{data.tree} structure to a \code{data.frame} #' #' If a node field contains data of length > 1, then that is converted into a string in the #' data.frame. #' #' @param x The root \code{Node} of the tree or sub-tree to be convert to a data.frame #' @param ... the attributes to be added as columns of the data.frame. See \code{\link{Get}} for details. #' If a specific Node does not contain the attribute, \code{NA} is added to the data.frame. #' @param traversal any of 'pre-order' (the default), 'post-order', 'in-order', 'level', or 'ancestor'. See \code{\link{Traverse}} for details. #' @param direction when converting to a network, should the edges point from root to children ("climb") or from child to parent ("descend")? #' @param type when converting type columns, the \code{type} is the discriminator, i.e. an attribute (e.g. field name) of each node #' @param prefix when converting type columns, the prefix used for the column names. Can be NULL to omit prefixes. #' @param filterFun a function taking a \code{Node} as an argument. See \code{\link{Traverse}} for details. #' @param format if \code{FALSE} (the default), then no formatting will be applied. If \code{TRUE}, then the first formatter (if any) along the ancestor #' path is used for formatting. #' @param inheritFromAncestors if FALSE, and if the attribute is a field or a method, then only a \code{Node} itself is #' searched for the field/method. If TRUE, and if the \code{Node} does not contain the attribute, then ancestors are also searched. #' @param row.names \code{NULL} or a character vector giving the row names for the data frame. #' Missing values are not allowed. #' @param optional logical. If \code{TRUE}, setting row names and converting column names #' (to syntactic names: see make.names) is optional. #' #' #' @examples #' data(acme) #' acme$attributesAll #' as.data.frame(acme, row.names = NULL, optional = FALSE, "cost", "p") #' #' ToDataFrameTree(acme, "cost", "p") #' ToDataFrameNetwork(acme, "cost", "p", direction = "climb") #' ToDataFrameTable(acme, "cost", "p") #' ToDataFrameTypeCol(acme) #' #' #use the pruneFun: #' acme$Do(function(x) x$totalCost <- Aggregate(x, "cost", sum), traversal = "post-order") #' ToDataFrameTree(acme, "totalCost", pruneFun = function(x) x$totalCost > 300000) #' #' #inherit #' acme$Set(floor = c(1, 2, 3), filterFun = function(x) x$level == 2) #' as.data.frame(acme, row.names = NULL, optional = FALSE, "floor", inheritFromAncestors = FALSE) #' as.data.frame(acme, row.names = NULL, optional = FALSE, "floor", inheritFromAncestors = TRUE) #' #' #using a function as an attribute: #' acme$Accounting$Head <- "Mrs. Numright" #' acme$Research$Head <- "Mr. Stein" #' acme$IT$Head <- "Mr. Squarehead" #' ToDataFrameTable(acme, department = function(x) x$parent$name, "name", "Head", "cost") #' #' #complex TypeCol #' acme$IT$Outsource$AddChild("India") #' acme$IT$Outsource$AddChild("Poland") #' acme$Set(type = c('company', 'department', 'project', 'project', 'department', #' 'project', 'project', 'department', 'program', 'project', #' 'project', 'project', 'project' #' ) #' ) #' print(acme, 'type') #' ToDataFrameTypeCol(acme, type = 'type') #' #' @inheritParams Prune #' #' @export as.data.frame.Node <- function(x, row.names = NULL, optional = FALSE, ..., traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), pruneFun = NULL, filterFun = NULL, format = FALSE, inheritFromAncestors = FALSE ) { traversal <- traversal[1] if(!isRoot(x) || length(pruneFun) > 0) { #clone s.t. x is root (for pretty level names) x <- Clone(x, attributes = TRUE) if (length(pruneFun) > 0) Prune(x, pruneFun) x$parent <- NULL } t <- Traverse(x, traversal = traversal, filterFun = filterFun) df <- data.frame( levelName = format(Get(t, 'levelName')), row.names = row.names, stringsAsFactors = FALSE) cols <- list(...) if(length(cols) == 0) return (df) for (i in 1:length(cols)) { col <- cols[[i]] if (length(names(cols)) > 0 && nchar(names(cols)[i]) > 0) colName <- names(cols)[i] else if (is.character(col)) colName <- col else stop(paste0("Cannot infer column name for ... arg nr ", i)) if (length(col) > 1) { it <- col } else { it <- Get(t, col, format = format, inheritFromAncestors = inheritFromAncestors, simplify = FALSE) it <- lapply(it, function(el) { if (inherits(el, "Node")) return ("") else if (length(unlist(el)) > 1) return (toString(unlist(el))) else if (length(unlist(el)) == 0) return (NA) else return (el) } ) } df[colName] <- unlist(it) } return (df) } #' @rdname as.data.frame.Node #' @return ToDataFrameTree: a \code{data.frame}, where each row represents a \code{Node} in the tree or sub-tree #' spanned by \code{x}, possibly pruned according to \code{pruneFun}. #' #' @export ToDataFrameTree <- function(x, ..., pruneFun = NULL) { as.data.frame(x, row.names = NULL, optional = FALSE, ..., pruneFun = pruneFun) } #' @rdname as.data.frame.Node #' #' #' @return ToDataFrameTable: a \code{data.frame}, where each row represents a leaf \code{Node} in the tree or sub-tree #' spanned by \code{x}, possibly pruned according to \code{pruneFun}. #' #' #' @export ToDataFrameTable <- function(x, ..., pruneFun = NULL) { df <- as.data.frame(x, row.names = NULL, optional = FALSE, ..., filterFun = isLeaf, pruneFun = pruneFun, inheritFromAncestors = TRUE) df[,-1] } #' @rdname as.data.frame.Node #' #' @return ToDataFrameNetwork: a \code{data.frame}, where each row represents a \code{Node} in the tree or sub-tree #' spanned by \code{x}, possibly pruned according to \code{pruneFun}. The first column is called 'from', while the #' second is called 'to', describing the parent to child edge (for direction "climb") or the child to parent edge (for direction "descend"). #' If \code{\link{AreNamesUnique}} is TRUE, then the Network is #' based on the \code{Node$name}, otherwise on the \code{Node$pathString} #' #' #' @export ToDataFrameNetwork <- function(x, ..., direction = c("climb", "descend"), pruneFun = NULL, format = FALSE, inheritFromAncestors = FALSE) { direction <- direction[1] if(!AreNamesUnique(x)) GetName <- function(x) x$pathString else GetName <- function(x) x$name t <- Traverse(x, traversal = "level", pruneFun = pruneFun) children <- Get(t, function(x) GetName(x)) parents <- Get(t, function(x) GetName(x$parent)) if (direction == "descend") df <- data.frame(from = children, to = parents, stringsAsFactors = FALSE) else if(direction == "climb") df <- data.frame(from = parents, to = children, stringsAsFactors = FALSE) else stop(paste0("direction ", direction, " unknown. Must be either climb or descend.")) df2 <- ToDataFrameTree(x, ..., traversal = "level", pruneFun = pruneFun, format = format, inheritFromAncestors = inheritFromAncestors)[,-1, drop = FALSE] df <- cbind(df, df2) df <- df[-1,] rownames(df) <- seq_len(nrow(df)) return (df) } #' @rdname as.data.frame.Node #' #' @return ToDataFrameTypeCol: a \code{data.frame} in table format (i.e. where each row represents a leaf in the tree or sub-tree #' spanned by \code{x}), possibly pruned according to \code{pruneFun}. In addition to \code{...}, each distinct #' \code{type} is output to a column. #' #' #' @export ToDataFrameTypeCol <- function(x, ..., type = 'level', prefix = type, pruneFun = NULL) { cols <- unique(c(x$Get(type, filterFun = isNotLeaf), x$Get(type, filterFun = isLeaf))) pathArgs <- GetPathArgV(cols, type) if (is.null(prefix)) names(pathArgs) <- as.character(cols) else names(pathArgs) <- paste0(prefix, '_', cols) do.call(ToDataFrameTable, c(x, pathArgs, ...)) } GetPathArg <- function(n, type) { lvl <- force(n) f <- function(leaf) { path <- leaf$Get(type, traversal = 'ancestor') name <- names(path[path == lvl]) if (length(name) == 0) name <- NA return (name) } return (f) } GetPathArgV <- Vectorize(GetPathArg, vectorize.args = 'n') #' Convert a \code{data.frame} to a \code{data.tree} structure #' #' @param x The data.frame in the required format. #' @param ... Any other argument implementations of this might need #' @param mode Either "table" (if x is a data.frame in tree or table format) or "network" #' @param na.rm If \code{TRUE}, then NA's are treated as NULL and values will not be set on nodes #' #' @return The root \code{Node} of the \code{data.tree} structure #' #' @examples #' data(acme) #' #' #Tree #' x <- ToDataFrameTree(acme, "pathString", "p", "cost") #' x #' xN <- as.Node(x) #' print(xN, "p", "cost") #' #' #Table #' x <- ToDataFrameTable(acme, "pathString", "p", "cost") #' x #' xN <- FromDataFrameTable(x) #' print(xN, "p", "cost") #' #' #More complex Table structure, using colLevels #' acme$Set(floor = c(1, 2, 3), filterFun = function(x) x$level == 2) #' x <- ToDataFrameTable(acme, "pathString", "floor", "p", "cost") #' x #' xN <- FromDataFrameTable(x, colLevels = list(NULL, "floor", c("p", "cost")), na.rm = TRUE) #' print(xN, "floor", "p", "cost") #' #' #Network #' x <- ToDataFrameNetwork(acme, "p", "cost", direction = "climb") #' x #' xN <- FromDataFrameNetwork(x) #' print(xN, "p", "cost") #' #' @seealso \code{\link{as.data.frame.Node}} #' @family as.Node #' #' @export as.Node.data.frame <- function(x, ..., mode = c("table", "network"), pathName = 'pathString', pathDelimiter = '/', colLevels = NULL, na.rm = TRUE) { mode <- mode[1] if (mode == 'table') return (FromDataFrameTable(x, pathName, pathDelimiter, colLevels, na.rm)) else if (mode == 'network') return (FromDataFrameNetwork(x)) else stop(paste0("Mode ", mode, " unknown.")) } #' @rdname as.Node.data.frame #' #' @param table a \code{data.frame} in table or tree format, i.e. having a row for each leaf (and optionally #' for additional nodes). There should be a column called \code{pathName}, separated by \code{pathDelimiter}, #' describing the path of each row. #' @param pathName The name of the column in x containing the path of the row #' @param pathDelimiter The delimiter used to separate nodes in \code{pathName} #' @param colLevels Nested list of column names, determining on what node levels the attributes are written to. #' #' @inheritParams CheckNameReservedWord #' #' @export FromDataFrameTable <- function(table, pathName = 'pathString', pathDelimiter = '/', colLevels = NULL, na.rm = TRUE, check = c("check", "no-warn", "no-check") ) { if (!is(table, "data.frame")) stop("table must be a data.frame") #make sure tibble etc. work (#115) table <- as.data.frame(table) table[[pathName]] <- as.character(table[[pathName]]) root <- NULL mycols <- names(table)[ !(names(table) %in% c(NODE_RESERVED_NAMES_CONST, pathName)) ] for (i in 1:nrow(table)) { myrow <- table[ i, , drop = FALSE] mypath <- myrow[[pathName]] myvalues <- myrow[!colnames(myrow) == pathName] #create node and ancestors if necessary (might already have been created) paths <- strsplit(mypath, pathDelimiter, fixed = TRUE)[[1]] paths <- paths[paths!=""] if (is.null(root)) root <- Node$new(paths[1], check) mynode <- root colsToSet <- mycols colsToSetForLeaf <- mycols for (path in paths[-1]) { path <- CheckNameReservedWord(path, check) child <- Climb(mynode, path) if( is.null(child)) { mynode <- mynode$AddChild(path) } else { mynode <- child } if( length(colLevels) >= mynode$level ) { colsToSet <- intersect(colLevels[[mynode$level]], mycols) #fill values on appropriate level for (mycol in colsToSet) { if ( !( na.rm && is.na(myrow[[mycol]]) )) { mynode[[mycol]] <- myrow[[mycol]] } } colsToSetForLeaf <- colsToSetForLeaf[!(colsToSetForLeaf %in% colsToSet)] } } #put the rest in the leaf for (mycol in colsToSetForLeaf) { if ( !( na.rm && is.na(myrow[[mycol]]) )) { mynode[[mycol]] <- myrow[[mycol]] } #remove } } return (root) } #' @rdname as.Node.data.frame #' #' @param network A \code{data.frame} in network format, i.e. #' it must adhere to the following requirements: #' \itemize{ #' \item{It must contain as many rows as there are nodes (excluding the root, there is no row for the root)} #' \item{Its first and second columns contain the network relationships. This can be either climbing (from parent to children) or descending (from child to parent)} #' \item{Its subsequent columns contain the attributes to be set on the nodes} #' \item{It must contain a single root} #' \item{There are no cycles in the network} #' } #' #' @import methods #' #' @inheritParams CheckNameReservedWord #' #' @export FromDataFrameNetwork <- function(network, check = c("check", "no-warn", "no-check")) { if (!is(network, "data.frame")) stop("network must be a data.frame") #make sure tibble etc. work (#115) network <- as.data.frame(network) if (dim(network)[2] < 2) stop("network must hold the relationships in the first two columns") if (length(unique(network[ , 1])) > length(unique(network[ , 2]))) { children <- network[ , 1] parents <- network[ , 2] } else { children <- network[ , 2] parents <- network[ , 1] } rootName <- unique(parents[!(parents %in% children)]) if (length(rootName) != 1) stop("Cannot find root name. network is not a tree!") root <- Node$new(rootName, check) AddChildren <- function(node) { childrenIdxs <- which(parents == node$name) for (idx in childrenIdxs) { nodeName <- children[idx] child <- node$AddChild(nodeName) if (dim(network)[2] > 2) { for (j in 3:dim(network)[2]) { vlu <- network[idx, j] if (!is.na(vlu)) { nm <- names(network)[j] nm <- CheckNameReservedWord(nm, check) if (!nm %in% NODE_RESERVED_NAMES_CONST) child[[nm]] <- network[idx, j] } } } AddChildren(child) } } AddChildren(root) return (root) } ================================================ FILE: R/node_conversion_dendrogram.R ================================================ #' Convert a \code{\link{dendrogram}} to a data.tree \code{Node} #' #' @param x The dendrogram #' @param name The name of the root Node #' @param heightName The name under which the dendrogram's height is stored #' @param ... Additional parameters #' #' @return The root \code{Node} of a \code{data.tree} #' #' @examples #' hc <- hclust(dist(USArrests), "ave") #' dend1 <- as.dendrogram(hc) #' tree1 <- as.Node(dend1) #' tree1$attributesAll #' tree1$totalCount #' tree1$leafCount #' tree1$height #' #' @family as.Node #' #' @inheritParams CheckNameReservedWord #' #' @export as.Node.dendrogram <- function(x, name = "Root", heightName = "plotHeight", check = c("check", "no-warn", "no-check"), ...) { #str(unclass(dend1)) if (is.leaf(x)) { name <- attr(x, 'label') } else if(is.null(name) && is.null(attr(x, "edgetext"))) { name <- tempfile(pattern = '', tmpdir = '') } else if(!is.null(attr(x, "edgetext"))) { name <- attr(x, "edgetext") } n <- Node$new(name, check) reserved <- c('label', 'class', 'comment', 'dim', 'dimnames', 'names', 'row.names', 'tsp', NODE_RESERVED_NAMES_CONST) ats <- names(attributes(x)) for (a in ats[!(ats %in% reserved)]) { n[[a]] <- attr(x, a) } n[[heightName]] <- attr(x, "height") if (!is.leaf(x)) { for (i in 1:length(x)) { childNode <- as.Node.dendrogram(x[[i]], name = NULL, ...) n$AddChildNode(childNode) if(!is.leaf(x[[i]])) { name <- as.character(childNode$position) childNode$name <- name } } } else { n$value <- as.vector(x) } return (n) } #' Convert a \code{Node} to a \code{dendrogram} #' #' Convert a \code{data.tree} structure to a \code{\link{dendrogram}} #' #' @param object The Node to convert #' @param heightAttribute The attribute (field name or function) storing the height #' @param edgetext If TRUE, then the for non-leaf nodes the node name is stored as the dendrogram's edge text. #' @param ... Additional parameters #' #' @return An object of class dendrogram #' #' @examples #' data(acme) #' acmed <- as.dendrogram(acme) #' plot(acmed, center = TRUE) #' #' #you can take an attribute for the height: #' acme$Do( function(x) x$myPlotHeight <- (10 - x$level)) #' acmed <- as.dendrogram(acme, heightAttribute = "myPlotHeight") #' plot(acmed, center = TRUE) #' #' #or directly a function #' acmed <- as.dendrogram(acme, heightAttribute = function(x) 10 - x$level) #' plot(acmed) #' #' @family Conversions from Node #' #' @import stats #' @export as.dendrogram.Node <- function(object, heightAttribute = DefaultPlotHeight, edgetext = FALSE, ...) { node <- object #strange: the original dendrogram will # unclass the nested dendrograms as well, # while ours won't? # # hc <- hclust(dist(USArrests), "ave") # dend1 <-d as.dendrogram(hc) # node <- as.Node(dend1) # dend2 <- as.dendrogram(node) # unclass(dend1) # unclass(dend2) height <- as.vector(GetAttribute(node, heightAttribute)) if (node$isLeaf) { res <- node$value if (is.null(res)) res <- 0 res <- structure(res, label = node$name, members = 1, height = height, leaf = node$isLeaf, class = "dendrogram") } else { #res <- list() #class(res) <- "dendrogram" res <- unname(lapply(node$children, FUN = function(x) as.dendrogram(x, heightAttribute, ...))) res <- structure(res, members = node$leafCount, midpoint = node$midpoint, height = height, class = "dendrogram") if (edgetext) attr(res, "edgetext") <- node$name } return (res) } ================================================ FILE: R/node_conversion_igraph.R ================================================ #' Convert a \code{data.tree} structure to an igraph network #' #' This requires the igraph package to be installed. #' Also, this requires the names of the \code{Nodes} to be unique within #' the \code{data.tree} structure. #' #' @param x The root \code{Node} to convert #' @param vertexAttributes A vector of strings, representing the attributes #' in the \code{data.tree} structure to add as attributes to the vertices of the igraph #' @param edgeAttributes A vector of strings, representing the attributes #' in the \code{data.tree} structure to add as edge attributes of the igraph #' @param ... Currently unused. #' #' @inheritParams igraph::graph_from_data_frame #' @inheritParams ToDataFrameNetwork #' #' @return an \code{igraph} object #' #' @examples #' data(acme) #' library(igraph) #' ig <- as.igraph(acme, "p", c("level", "isLeaf")) #' plot(ig) #' #' @seealso AreNamesUnique #' #' @export as.igraph.Node <- function(x, vertexAttributes = character(), edgeAttributes = character(), directed = FALSE, direction = c("climb", "descend"), ...) { if (!AreNamesUnique(x)) stop("Node names must be unique within the tree") network <- do.call("ToDataFrameNetwork", c(x, "name", vertexAttributes, edgeAttributes, direction = direction)) data <- network[,c("from", "to", edgeAttributes)] vert <- do.call("ToDataFrameTree", c(x, "name", vertexAttributes))[,-1] ig <- igraph::graph_from_data_frame(data, directed = directed, vertices = vert) return (ig) } ================================================ FILE: R/node_conversion_list.R ================================================ #' Convert a nested \code{list} structure to a \code{data.tree} structure #' #' @param x The \code{list} to be converted. #' @param mode How the list is structured. "simple" (the default) will interpret any list to be a child. "explicit" #' assumes that children are in a nested list called \code{childrenName} #' @param nameName The name of the element in the list that should be used as the name, can be NULL if mode = explicit and #' the children lists are named, or if an automatic name (running number) should be assigned #' @param childrenName The name of the element that contains the child list (applies to mode 'explicit' only). #' @param nodeName A name suggestion for x, if the name cannot be deferred otherwise. This is for example the case for #' the root with mode explicit and named lists. #' @param interpretNullAsList If \code{TRUE}, then \code{NULL}-valued lists are interpreted as child nodes. Else, they are interpreted as attributes. #' This has only an effect if \code{mode} is "simple". #' @param ... Any other argument to be passed to generic sub implementations #' #' @examples #' kingJosephs <- list(name = "Joseph I", #' spouse = "Mary", #' born = "1818-02-23", #' died = "1839-08-29", #' children = list( #' list(name = "Joseph II", #' spouse = "Kathryn", #' born = "1839-03-28", #' died = "1865-12-19"), #' list(name = "Helen", #' born = "1840-17-08", #' died = "1845-01-01") #' ) #' ) #' FromListExplicit(kingJosephs) #' #' kingJosephs <- list(head = "Joseph I", #' spouse = "Mary", #' born = "1818-02-23", #' died = "1839-08-29", #' list(head = "Joseph II", #' spouse = "Kathryn", #' born = "1839-03-28", #' died = "1865-12-19"), #' list(head = "Helen", #' born = "1840-17-08", #' died = "1845-01-01") #' ) #' FromListSimple(kingJosephs, nameName = "head") #' #' kingJosephs <- list(spouse = "Mary", #' born = "1818-02-23", #' died = "1839-08-29", #' `Joseph II` = list(spouse = "Kathryn", #' born = "1839-03-28", #' died = "1865-12-19"), #' Helen = list(born = "1840-17-08", #' died = "1845-01-01") #' #' ) #' FromListSimple(kingJosephs, nodeName = "Joseph I") #' #' @inheritParams CheckNameReservedWord #' @family as.Node #' #' @export as.Node.list <- function(x, mode = c("simple", "explicit"), nameName = "name", childrenName = "children", nodeName = NULL, interpretNullAsList = FALSE, check = c("check", "no-warn", "no-check"), ...) { mode <- mode[1] check <- check[1] #find my name if (is.null(nameName) || !(nameName %in% names(x))) { if (length(nodeName)==0) myName <- "Root" else myName <- nodeName } else { myName <- x[[nameName]] } n <- Node$new(as.character(myName), check = check) #set attributes #find attributes that need importing attributes <- names(x) #capture attributes without names if (is.null(attributes) && length(x) !=0) { attributes <- rep("", length(x)) } field_nums <- seq_along(x) unnamed_attributes <- attributes == "" & !vapply(x, is.list, logical(1)) #exclude nameName if(!is.null(nameName)) { field_nums <- field_nums[attributes != nameName] unnamed_attributes <- unnamed_attributes[attributes != nameName] attributes <- attributes[attributes != nameName] } #exclude childrenName if explicit if (mode == "explicit") { field_nums <- field_nums[attributes != childrenName] unnamed_attributes <- unnamed_attributes[attributes != childrenName] attributes <- attributes[attributes != childrenName] } attributes[unnamed_attributes] <- seq_along(which(unnamed_attributes)) if (check != "no-check") { fieldNameIsReserved <- (attributes %in% NODE_RESERVED_NAMES_CONST) & !(attributes %in% c(nameName, childrenName)) if (any(fieldNameIsReserved) && (check != "no-warn")) warning(paste0("The following names are data.tree reserved words and will be appended with 2: ", paste(attributes[fieldNameIsReserved], sep = ", "), "." )) } for (i in seq_along(field_nums)) { v <- x[[field_nums[i]]] if(mode == 'simple' && inherits(v, 'list')) { #any list is interpreted as child, so don't store } else { fieldNm <- attributes[i] if (fieldNm %in% NODE_RESERVED_NAMES_CONST) fieldNm <- paste0(fieldNm, "2") n[[fieldNm]] <- v } } #children if (is.character(x)) return (n) if (mode == 'simple') { if (interpretNullAsList) children <- x[vapply(x, function(y) is.list(y) || is.null(y), logical(1))] else children <- x[vapply(x, is.list, logical(1))] } else if (mode == 'explicit') children <- x[[childrenName]] if (length(children) == 0) return (n) for (i in 1:length(children)) { if (any(duplicated(names(children)))) { childName <- "" } else if (is.character(children)) { childName <- children[i] } else if (!is.null(names(children))) { childName <- names(children)[i] } else { childName <- "" } if (nchar(childName) == 0) childName <- i child <- children[[i]] childNode <- as.Node.list(child, mode, nameName, childrenName, nodeName = childName, interpretNullAsList = interpretNullAsList, check = check, ...) n$AddChildNode(childNode) } return (n) } #' @rdname as.Node.list #' #' @param explicitList A \code{list} in which children are in a separate nested list called \code{childrenName}. #' #' @export FromListExplicit <- function(explicitList, nameName = "name", childrenName = "children", nodeName = NULL, check = c("check", "no-warn", "no-check")) { as.Node.list(explicitList, mode = "explicit", nameName = nameName, childrenName = childrenName, nodeName = nodeName, check = check) } #' @rdname as.Node.list #' #' @param simpleList A \code{list} in which children are stored as nested list alongside other attributes. Any list is #' interpreted as a child \code{Node} #' #' @export FromListSimple <- function(simpleList, nameName = "name", nodeName = NULL, interpretNullAsList = FALSE, check = c("check", "no-warn", "no-check")) { as.Node.list(simpleList, mode = "simple", nameName = nameName, nodeName = nodeName, interpretNullAsList = interpretNullAsList, check = check) } #' Convert a \code{data.tree} structure to a list-of-list structure #' #' @param x The Node to convert #' @param mode How the list is structured. "simple" (the default) will add children directly as nested lists. #' "explicit" puts children in a separate nested list called \code{childrenName} #' @param unname If TRUE, and if \code{mode} is "explicit", then the nested children list will not have named arguments. This #' can be useful e.g. in the context of conversion to JSON, if you prefer the children to be #' an array rather than named objects. #' @param nameName The name that should be given to the name element #' @param childrenName The name that should be given to the children nested list #' @param rootName The name of the node. If provided, this overrides \code{Node$name} #' @param keepOnly A character vector of attributes to include in the result. If \code{NULL} (the default), all attributes are kept. #' @param ... Additional parameters passed to \code{as.list.Node} #' #' @examples #' data(acme) #' #' str(ToListSimple(acme)) #' str(ToListSimple(acme, keepOnly = "cost")) #' #' str(ToListExplicit(acme)) #' str(ToListExplicit(acme, unname = TRUE)) #' str(ToListExplicit(acme, unname = TRUE, nameName = "id", childrenName = "descendants")) #' #' @inheritParams Prune #' #' @export as.list.Node <- function(x, mode = c("simple", "explicit"), unname = FALSE, nameName = ifelse(unname, "name", ""), childrenName = 'children', rootName = '', keepOnly = NULL, pruneFun = NULL, ...) { mode <- mode[1] self <- x res <- list() myname <- if (nchar(rootName) != 0) rootName else x$name if (nchar(nameName) != 0 || nchar(rootName) != 0 || isRoot(x)) { l_nameName <- nameName if (nchar(nameName) == 0) l_nameName <- "name" res[l_nameName] <- myname } attributes <- self$attributes attributes <- attributes[!is.function(attributes) && !is.environment(attributes)] if (!is.null(keepOnly) & !all(is.na(attributes))) attributes <- attributes[attributes %in% keepOnly] for (attributeName in attributes) res[[attributeName]] <- self[[attributeName]] if (!self$isLeaf) { children <- self$children if (length(pruneFun) > 0) { filter <- unlist(lapply(children, pruneFun)) children <- children[filter] } kids <- lapply(children, FUN = function(x) as.list.Node(x, mode, unname, nameName, childrenName, keepOnly = keepOnly, pruneFun = pruneFun, ...)) if (mode == "explicit") { res[[childrenName]] <- kids if (unname) res[[childrenName]] <- unname(res[[childrenName]]) } else if (mode == "simple") { res <- c(res, kids) } else { stop(paste0("Mode ", mode, " unknown")) } } return(res) } #' @rdname as.list.Node #' #' @export ToListSimple <- function(x, nameName = "name", pruneFun = NULL, ...) { as.list.Node(x, mode = "simple", nameName = nameName, pruneFun = pruneFun, ...) } #' @rdname as.list.Node #' #' #' @export ToListExplicit <- function(x, unname = FALSE, nameName = ifelse(unname, "name", ""), childrenName = 'children', pruneFun = NULL, ...) { as.list.Node(x, mode = "explicit", unname = unname, nameName = nameName, childrenName = childrenName, pruneFun = pruneFun, ...) } ================================================ FILE: R/node_conversion_party.R ================================================ #' Convert a a \code{SplitNode} from the party package to a \code{data.tree} structure. #' #' @param x The BinaryTree #' @param ... additional arguments (unused) #' #' @examples #' library(party) #' airq <- subset(airquality, !is.na(Ozone)) #' airct <- ctree(Ozone ~ ., data = airq, #' controls = ctree_control(maxsurrogate = 3)) #' #' tree <- as.Node(airct) #' tree #' #' print(tree, #' "label", #' criterion = function(x) round(x$criterion$maxcriterion, 3), #' statistic = function(x) round(max(x$criterion$statistic), 3) #' ) #' #' FindNode(tree, 6)$path #' #' #' @export #' as.Node.BinaryTree <- function(x, ...) { CreateNodeFromSplittingNode(x@tree) } CreateNodeFromSplittingNode <- function(splittingNode, left = TRUE) { node <- Node$new(splittingNode$nodeID, weights = splittingNode$weights, criterion = splittingNode$criterion, psplit = splittingNode$psplit, ssplit = splittingNode$ssplit, label = GetSplittingNodeLabel(splittingNode, left)) if (!splittingNode$terminal) { node$AddChildNode( CreateNodeFromSplittingNode(splittingNode$left) ) node$AddChildNode( CreateNodeFromSplittingNode(splittingNode$right, left = FALSE) ) } return (node) } GetSplittingNodeLabel <- function(splittingNode, left) { if( splittingNode$terminal ) { paste0("weights = ", sum(splittingNode$weights)) } else { as.character.orderedSplit(splittingNode$psplit, left) } } as.character.orderedSplit <- function(x, left = TRUE, ...) { if (!is.null(attr(x$splitpoint, "levels"))) { sp <- attr(x$splitpoint, "levels")[x$splitpoint] } else { sp <- x$splitpoint } if (!is.null(x$toleft)) left <- as.logical(x$toleft) == left if (left) { res <- paste0(x$variableName, " <= ", sp) } else { res <- paste0(x$variableName, " > ", sp) } return (res) } #' Convert a a \code{party} from the partykit package to a \code{data.tree} structure. #' #' @param x The party object #' @param ... other arguments (unused) #' #' @examples #' library(partykit) #' data("WeatherPlay", package = "partykit") #' ### splits ### #' # split in overcast, humidity, and windy #' sp_o <- partysplit(1L, index = 1:3) #' sp_h <- partysplit(3L, breaks = 75) #' sp_w <- partysplit(4L, index = 1:2) #' #' ## query labels #' character_split(sp_o) #' #' ### nodes ### #' ## set up partynode structure #' pn <- partynode(1L, split = sp_o, kids = list( #' partynode(2L, split = sp_h, kids = list( #' partynode(3L, info = "yes"), #' partynode(4L, info = "no"))), #' partynode(5L, info = "yes"), #' partynode(6L, split = sp_w, kids = list( #' partynode(7L, info = "yes"), #' partynode(8L, info = "no"))))) #' pn #' ### tree ### #' ## party: associate recursive partynode structure with data #' py <- party(pn, WeatherPlay) #' tree <- as.Node(py) #' #' print(tree, #' "splitname", #' count = function(node) nrow(node$data), #' "splitLevel") #' #' SetNodeStyle(tree, #' label = function(node) paste0(node$name, ": ", node$splitname), #' tooltip = function(node) paste0(nrow(node$data), " observations"), #' fontname = "helvetica") #' SetEdgeStyle(tree, #' arrowhead = "none", #' label = function(node) node$splitLevel, #' fontname = "helvetica", #' penwidth = function(node) 12 * nrow(node$data)/nrow(node$root$data), #' color = function(node) { #' paste0("grey", #' 100 - as.integer( 100 * nrow(node$data)/nrow(node$root$data)) #' ) #' } #' ) #' Do(tree$leaves, #' function(node) { #' SetNodeStyle(node, #' shape = "box", #' color = ifelse(node$splitname == "yes", "darkolivegreen4", "lightsalmon4"), #' fillcolor = ifelse(node$splitname == "yes", "darkolivegreen1", "lightsalmon"), #' style = "filled,rounded", #' penwidth = 2 #' ) #' } #' ) #' #' plot(tree) #' #' #' @export as.Node.party <- function(x, ...) { tree <- FromParty(x, x$node) tree$Do(function(node) node$splitLevel <- node$parent$splitlevels[node$position], filterFun = isNotRoot) return (tree) } FromParty <- function(party, partynode) { stopifnot(inherits(party, "party")) node <- Node$new(partynode$id) for (childnode in partynode$kids) { childid <- childnode$id childparty <- party[[as.character(childid)]] node$AddChildNode(FromParty(childparty, childnode)) } node$data <- party$data node$fitted <- party$fitted node$partyinfo <- party$info node$nodeinfo <- partynode$info node$terms <- party$terms node$split <- partynode$split formatInfo <- partykit::formatinfo_node(partynode) if (length(partynode) > 0) { csplit <- partykit::character_split(partynode$split, party$data) node$splitlevels <- csplit$levels node$splitname <- csplit$name } else if (identical(nchar(formatInfo) > 0, TRUE)) { node$splitname <- formatInfo } return (node) } ================================================ FILE: R/node_conversion_rpart.R ================================================ #' Convert an \code{\link{rpart}} object to a \code{data.tree} structure #' #' @param x the \code{rpart} object to be converted #' @param digits the number of digits to be used for numeric values in labels #' @param use.n logical. Add cases to labels, see \code{\link{text.rpart}} for further #' information #' @param ... any other argument to be passed to generic sub implementations #' #' @return a \code{data.tree} object. The tree contains a field \code{rpart.id} which #' references back to the original node id in the row names of the \code{rpart} object. #' @export #' #' @examples #' if (require(rpart)) { #' fit <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis) #' as.Node(fit) #' } #' @family as.Node as.Node.rpart <- function(x, digits = getOption("digits") - 3, use.n = FALSE, ...) { frame <- x$frame ylevels <- attr(x, "ylevels") nodes <- as.numeric(rownames(frame)) leaves <- frame$var == "" leaf_labels <- x$functions$text( yval = if (is.null(frame$yval2)) frame$yval[leaves] else frame$yval2[leaves, ], dev = frame$dev[leaves], wt = frame$wt[leaves], ylevel = ylevels, digits = digits, n = frame$n[leaves], use.n = use.n) node_labels <- setNames(c(labels(x)[which(!leaves) + 1L], leaf_labels), c(nodes[!leaves], nodes[leaves])) network_df <- data.frame(from = node_labels[as.character(floor(nodes[-1L] / 2L))], to = node_labels[as.character(nodes[-1L])], rpart.id = nodes[-1L]) tree <- FromDataFrameNetwork(network_df) tree$rpart.id <- nodes[1L] tree } ================================================ FILE: R/node_methods.R ================================================ # # These are the methods that would normally sit on Node # However, to reduce the memory footprint of the Node object, # we only support traditional R methods. # The first argument of all these methods is node #' Print a \code{Node} in a human-readable fashion. #' #' @param x The Node #' @param ... Node attributes to be printed. Can be either a character (i.e. the name of a Node field), #' a Node method, or a function taking a Node as a single argument. See \code{Get} for details on #' the meaning of \code{attribute}. #' @param pruneMethod The method can be used to prune for printing in a simple way. If NULL, the entire tree is displayed. If #' "simple", then only the first \code{limit} nodes are displayed. If "dist", then Nodes are removed #' everywhere in the tree, according to their level. If pruneFun is provided, then pruneMethod is ignored. #' @param limit The maximum number of nodes to print. Can be \code{NULL} if the #' entire tree should be printed. #' @param row.names If \code{TRUE} (default), then the row names are printed out. Else, they are not. #' #' @inheritParams ToDataFrameTree #' #' @examples #' data(acme) #' print(acme, "cost", "p") #' print(acme, "cost", probability = "p") #' print(acme, expectedCost = function(x) x$cost * x$p) #' do.call(print, c(acme, acme$attributesAll)) #' #' tree <- CreateRegularTree(4, 5) #' # print entire tree: #' print(tree, pruneMethod = NULL) #' # print first 20 nodes: #' print(tree, pruneMethod = "simple", limit = 20) #' # print 20 nodes, removing leafs first: #' print(tree, pruneMethod = "dist", limit = 20) #' # provide your own pruning function: #' print(tree, pruneFun = function(node) node$position != 2) #' #' #' @export print.Node <- function(x, ..., pruneMethod = c("simple", "dist", NULL), limit = 100, pruneFun = NULL, row.names = T) { if (length(pruneFun) > 0) pruneMethod <- NULL pruneMethod <- pruneMethod[1] if (length(pruneMethod) > 0 && length(limit) > 0) { if (pruneMethod == "simple") { x <- PrintPruneSimple(x, limit = limit) } else if (pruneMethod == "dist") { x <- PrintPruneDist(x, limit = limit) } else { stop (paste0("Unknown pruneMethod ", pruneMethod, "!")) } } else if(!isRoot(x)) { #clone s.t. x is root (for pretty level names) x <- Clone(x, attributes = TRUE) x$parent <- NULL } df <- ToDataFrameTree(x, format = TRUE, ..., pruneFun = pruneFun) print(df, na.print = "", row.names = row.names) } #' Aggregate child values of a \code{Node}, recursively. #' #' The \code{Aggregate} method lets you fetch an attribute from a \code{Node}'s children, and then aggregate them #' using \code{aggFun}. For example, you can aggregate cost by summing costs of child \code{Nodes}. This is especially useful in the #' context of tree traversal, when using post-order traversal mode. #' #' As with \code{\link{Get}}, the attribute can be a field, a method or a function. If the attribute on a child #' is \code{NULL}, \code{Aggregate} is called recursively on its children. #' #' @param node the \code{Node} on which to aggregate #' @param aggFun the aggregation function to be applied to the children's \code{attributes} #' @param ... any arguments to be passed on to attribute (in case it's a function) #' #' @inheritParams Get #' #' @examples #' data(acme) #' #' #Aggregate on a field #' Aggregate(acme, "cost", sum) #' #' #This is the same as: #' HomeRolledAggregate <- function(node) { #' sum(sapply(node$children, function(child) { #' if (!is.null(child$cost)) child$cost #' else HomeRolledAggregate(child) #' })) #' } #' HomeRolledAggregate(acme) #' #' #Aggregate using Get #' print(acme, "cost", minCost = acme$Get(Aggregate, "cost", min)) #' #' #use Aggregate with a function: #' Aggregate(acme, function(x) x$cost * x$p, sum) #' #' #cache values along the way #' acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum), traversal = "post-order") #' acme$IT$cost #' #' @seealso \code{\link{Node}} #' #' @export Aggregate = function(node, attribute, aggFun, ...) { if("cacheAttribute" %in% names(list(...))) stop("cacheAttribute not supported anymore! Please use Do instead.") if (isLeaf(node)) return ( GetAttribute(node, attribute, ...) ) values <- sapply(node$children, function(x) { v <- GetAttribute(x, attribute, format = identity, ...) if (length(v) > 0 && !any(is.na(v))) return(v) Aggregate(x, attribute, aggFun, ...) }) result <- unname(aggFun(values)) return (result) } #' Cumulate values among siblings #' #' For example, you can sum up values of siblings before #' this \code{Node}. #' #' @param node The node on which we want to cumulate #' #' @inheritParams Aggregate #' @inheritParams Get #' #' @examples #' data(acme) #' acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum), traversal = "post-order") #' acme$Do(function(x) x$cumCost <- Cumulate(x, "cost", sum)) #' print(acme, "cost", "cumCost") #' #' @export Cumulate = function(node, attribute, aggFun, ...) { if ("cacheAttribute" %in% names(list(...))) stop("cacheAttribute not supported anymore! Please use Do instead.") if (isRoot(node)) return (GetAttribute(node, attribute)) pos <- node$position nodes <- node$parent$children[1:pos] res <- aggFun(Get(nodes, attribute)) return (res) } #' Clone a tree (creates a deep copy) #' #' The method also clones object attributes (such as the formatters), if desired. #' If the method is called on a non-root, then the parent relationship is not cloned, #' and the resulting \code{\link{Node}} will be a root. #' #' @param node the root node of the tree or sub-tree to clone #' @param attributes if FALSE, then R class attributes (e.g. formatters and grViz styles) #' are not cloned. This makes the method faster. #' @return the clone of the tree or sub-tree #' #' @examples #' data(acme) #' acmeClone <- Clone(acme) #' acmeClone$name <- "New Acme" #' # acmeClone does not point to the same reference object anymore: #' acme$name #' #' #cloning a subtree #' data(acme) #' itClone <- Clone(acme$IT) #' itClone$isRoot #' #' #' @inheritParams Prune #' #' @seealso SetFormat #' #' @export Clone <- function(node, pruneFun = NULL, attributes = FALSE) { .Clone(node, pruneFun, attributes) } .Clone <- function(node, pruneFun = NULL, attributes = FALSE, firstCall = TRUE) { myclone <- node$clone() if (attributes) attributes(myclone) <- attributes(node) if (!is.null(pruneFun) && length(node$children) > 0) { keep <- sapply(node$children, pruneFun) children <- node$children[keep] rm(list = names(node$children)[!keep], envir = myclone) } else children <- node$children myclone$children <- lapply(children, function(x) .Clone(x, pruneFun, attributes, firstCall = FALSE)) for (child in myclone$children) { myclone[[child$name]] <- child child$parent <- myclone } if (length(myclone$children) == 0) myclone$children <- NULL if (firstCall) myclone$parent <- NULL #myclone$RemoveAttribute("parent", stopIfNotAvailable = FALSE) return (myclone) } #' Navigate to another node by relative path. #' #' @usage Navigate(node, path) #' #' @param node The starting \code{\link{Node}} to navigate #' @param path A string or a character vector describing the path to navigate #' #' @details The \code{path} is always relative to the \code{node}. Navigation #' to the parent is defined by \code{..}, whereas navigation to a child #' is defined via the child's name. #' If path is provided as a string, then the navigation steps are separated #' by '/'. #' #' @examples #' data(acme) #' Navigate(acme$Research, "../IT/Outsource") #' Navigate(acme$Research, c("..", "IT", "Outsource")) #' #' @seealso \code{\link{Climb}} #' #' @export Navigate <- function(node, path) { if (length(path) == 1) path <- strsplit(path, "/", fixed = TRUE)[[1]] for (nxt in path) { if (identical("..", nxt)) { node <- node$parent } else if (identical(".", nxt)) { #don't do anything } else { node <- node[[nxt]] } } return (node) } #' Climb a tree from parent to children, by provided criteria. #' #' #' This method lets you climb the tree, from crutch to crutch. On each \code{Node}, the #' \code{Climb} finds the first child having attribute value equal to the the provided argument. #' #' @usage #node$Climb(...) #' Climb(node, ...) #' #' #' @param node The root \code{\link{Node}} of the tree or subtree to climb #' @param ... an attribute-value pairlist to be searched. For brevity, you can also provide a character vector to search for names. #' @return the \code{Node} having path \code{...}, or \code{NULL} if such a path does not exist #' #' @examples #' data(acme) #' #' #the following are all equivalent #' Climb(acme, 'IT', 'Outsource') #' Climb(acme, name = 'IT', name = 'Outsource') #' Climb(acme, 'IT')$Climb('Outsource') #' Navigate(acme, path = "IT/Outsource") #' #' Climb(acme, name = 'IT') #' #' Climb(acme, position = c(2, 1)) #' #or, equivalent: #' Climb(acme, position = 2, position = 1) #' Climb(acme, name = "IT", cost = 250000) #' #' tree <- CreateRegularTree(5, 2) #' tree$Climb(c("1", "1"), position = c(2, 2))$path #' #' @seealso \code{\link{Node}} #' @seealso \code{\link{Navigate}} #' #' @export Climb <- function(node, ...) { path <- list(...) if (length(path) == 0) { return (node) } else { #convert args to standard #e.g. id = (3, 5), name = "myname" #to # id = 3, id = 5, name = "mynam" # path <- list(id = c(3, 5), "myname", c("bla", "blo")) # path <- list(id = 3, id = 5, name = "myname") # path <- c("IT") mpath <- NULL for (i in 1:length(path)) names(path[[i]]) <- rep(names(path)[i], length(path[[i]])) for (i in 1:length(path)) mpath <- c(mpath, as.list(path[[i]])) attribute <- names(mpath)[[1]] if (length(attribute) == 0 || is.na(attribute) || nchar(attribute) == 0) attribute <- "name" value <- mpath[[1]] if (attribute == "name") child <- node[[value]] else { getA <- Get(node$children, attribute) child <- node$children[getA == value][[1]] } if (is.null(child)) { return (NULL) } else if (length(mpath) == 1) { return (child) } else { return (do.call(Climb, c(node = child, mpath[-1]))) } } } #' Find a node by name in the (sub-)tree #' #' Scans the entire sub-tree spanned by \code{node} and returns the first \code{\link{Node}} #' having the \code{name} specified. This is mainly useful for trees whose name is unique. #' If \code{\link{AreNamesUnique}} is \code{FALSE}, i.e. if there is more than one \code{Node} #' called \code{name} in the tree, then it is undefined which one will be returned. #' Also note that this method is not particularly fast. See examples for a faster way to #' index large trees, if you need to do multiple searches. See \code{\link{Traverse}} if #' you need to find multiple \code{Nodes}. #' #' @param node The root \code{Node} of the tree or sub-tree to search #' @param name The name of the \code{Node} to be returned #' #' @return The first \code{Node} whose name matches, or \code{NULL} if no such \code{Node} is #' found. #' #' @examples #' data(acme) #' FindNode(acme, "Outsource") #' #' #re-usable hashed index for multiple searches: #' if(!AreNamesUnique(acme)) stop("Hashed index works for unique names only!") #' trav <- Traverse(acme, "level") #' names(trav) <- Get(trav, "name") #' nameIndex <- as.environment(trav) #' #you could also use hash from package hash instead! #' #nameIndex <- hash(trav) #' nameIndex$Outsource #' nameIndex$IT #' #' #' @seealso AreNamesUnique, Traverse #' #' @export FindNode <- function(node, name) { trav <- Traverse(node, filterFun = function(x) x$name == name) if (length(trav) == 0) return(NULL) return(trav[[1]]) } #' Find the distance between two nodes of the same tree #' #' The distance is measured as the number of edges that #' need to be traversed to reach node2 when starting #' from node1. #' #' @param node1 the first node in the tree #' @param node2 the second node in the same tree #' #' @examples #' data(acme) #' Distance(FindNode(acme, "Outsource"), FindNode(acme, "Research")) #' #' @export Distance <- function(node1, node2) { if(!identical(node1$root, node2$root)) stop("node1 and node2 must be in same tree!") path1 <- node1$path path2 <- node2$path i <- 1 maxi <- min(node1$level, node2$level) while (path1[i] == path2[i] && i <= maxi) i <- i + 1 distance <- length(path1) + length(path2) - 2 * (i - 1) return (distance) } #' Get an attribute from a Node. #' #' @param node The \code{\link{Node}} from which the \code{attribute} should be fetched. #' @param nullAsNa If TRUE (the default), then NULL is returned as NA. Otherwise it is returned as NULL. #' #' #' @inheritParams Get #' #' @examples #' data(acme) #' GetAttribute(acme$IT$Outsource, "cost") #' #' @export GetAttribute <- function(node, attribute, ..., format = FALSE, inheritFromAncestors = FALSE, nullAsNa = TRUE) { # for backwards compatibility: if (is.null(format)) format <- TRUE if (is.function(attribute)) { #function v <- attribute(node, ...) } else if(is.character(attribute) && length(attribute) == 1) { #property v <- node[[attribute]] if (is.function(v)) { if (is.null(formals(v))) v <- v() else if (names(formals(v))[[1]] == "self") v <- v(self = node, ...) #allow storing functions whose first arg is self else v <- v(...) } } else { stop("attribute must be a function, the name of a public property, or the name of method") } if (is.null(v) && inheritFromAncestors && !isRoot(node)) { v <- GetAttribute(node$parent, attribute, ..., format = format, inheritFromAncestors = TRUE, nullAsNa = FALSE) } if (!nullAsNa && is.null(v)) return (NULL) if (is.null(v)) v <- NA if(is.logical(format) && format == TRUE && !is.function(attribute)) { #get default formatter format <- GetObjectAttribute(node, "formatters")[[attribute]] } if (is.function(format)) { v <- format(v) } return (v) } GetObjectAttribute <- function(node, name) { a <- attr(node, name) ##try to speed up by avoiding isRoot call prnt <- node$parent if (length(a) > 0 || is.null(prnt)) return (a) return ( GetObjectAttribute(prnt, name)) } #' Set a formatter function on a specific node #' #' Formatter functions set on a Node act as a default formatter when printing and using #' the \code{\link{Get}} method. The formatter is inherited, meaning that whenever #' \code{Get} fetches an attribute from a \code{Node}, it checks on the \code{Node} or #' on any of its ancestors whether a formatter is set. #' #' @param node The node on which to set the formatter #' @param name The attribute name for which to set the formatter #' @param formatFun The formatter, i.e. a function taking a value as an input, and formatting #' returning the formatted value #' #' @examples #' data(acme) #' acme$Set(id = 1:(acme$totalCount)) #' SetFormat(acme, "id", function(x) FormatPercent(x, digits = 0)) #' SetFormat(Climb(acme, "IT"), "id", FormatFixedDecimal) #' print(acme, "id") #' # Calling Get with an explicit formatter will overwrite the default set on the Node: #' print(acme, id = acme$Get("id", format = function(x) paste0("id:", x))) #' #' # Or, to avoid formatters, even though you set them on a Node: #' print(acme, id = acme$Get("id", format = identity)) #' #' #' @seealso Get #' @seealso print.Node #' #' @export SetFormat <- function(node, name, formatFun) { if (length(attr(node, "formatters")) == 0) attr(node, "formatters") <- list() attr(node, "formatters")[[name]] <- formatFun } #' Test whether all node names are unique. #' #' This can be useful for some conversions. #' @param node The root \code{Node} of the \code{data.tree} structure to test #' #' @return \code{TRUE} if all \code{Node$name == TRUE} for all nodes in the tree #' #' @examples #' data(acme) #' AreNamesUnique(acme) #' acme$name <- "IT" #' AreNamesUnique(acme) #' #' @seealso as.igraph.Node #' @export AreNamesUnique <- function(node) { mynames <- node$Get("name") all(duplicated(mynames) == FALSE) } ================================================ FILE: R/node_methods_sideeffect.R ================================================ # # These are methods on Node which have side effects, meaning they # change a Node object or any of its descendants. To keep the # memory footprint of the Node object small, and to be able to # document them, they are implemented in traditional R style, # and their OO part is only a wrapper around the methods here. # # Requirements for side effect methods # 1. they are implement here # 2. their OO part in Node.R is a wrapper # 3. the Node documentation links to here # 4. the methods here are not exported # 5. the methods here are marked as internal, so a to have roxygen generate documentation # #' Sort children of a \code{Node} or an entire \code{data.tree} structure #' #' You can sort with respect to any argument of the tree. But note that sorting has #' side-effects, meaning that you modify the underlying, original data.tree object structure. #' #' @usage Sort(node, attribute, ..., decreasing = FALSE, recursive = TRUE) #' #' @param node The node whose children are to be sorted #' @param ... any parameters to be passed on the the attribute (in case it's a method or a #' function) #' @param decreasing sort order #' @param recursive if \code{TRUE}, Sort will be called recursively on the \code{Node}'s children. #' This allows sorting an entire tree. #' #' @inheritParams Get #' #' @return Returns the node on which Sort is called, invisibly. This can be useful to chain Node methods. #' #' @examples #' data(acme) #' acme$Do(function(x) x$totalCost <- Aggregate(x, "cost", sum), traversal = "post-order") #' Sort(acme, "totalCost", decreasing = FALSE) #' print(acme, "totalCost") #' #' @seealso \code{\link{Node}} #' @seealso \code{\link{Revert}} #' @export Sort <- function(node, attribute, ..., decreasing = FALSE, recursive = TRUE) { if (node$isLeaf) return() ChildL <- sapply(node$children, function(x) GetAttribute(x, attribute, ...)) names(ChildL) <- names(node$children) node$children <- node$children[names(sort(ChildL, decreasing = decreasing, na.last = TRUE))] if (recursive) for(child in node$children) Sort(child, attribute, ..., decreasing = decreasing, recursive = recursive) invisible (node) } #' Reverts the sort order of a \code{Node}'s children. #' #' @usage Revert(node, recursive = TRUE) #' #' @param node the Node whose childrens' sort order is to be reverted #' @param recursive If \code{TRUE}, then revert is called recursively on #' all children. #' #' @return returns the Node invisibly (for chaining) #' #' @seealso \code{\link{Node}} #' @seealso \code{\link{Sort}} #' @export Revert <- function(node, recursive = TRUE) { pf <- function(x) { if (recursive) return (TRUE) else return (x$level <= (node$level + 1)) } t <- Traverse(node, pruneFun = pf) Set(t, .tmp = 1:node$totalCount) Sort(node, ".tmp", decreasing = TRUE, recursive = recursive) Do(t, function(x) rm(".tmp", envir = x)) invisible (node) } #' Prunes a tree. #' #' Pruning refers to removing entire subtrees. This function has side-effects, it modifies your data.tree structure! #' #' @usage Prune(node, pruneFun) #' #' @param node The root of the sub-tree to be pruned #' @param pruneFun allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. #' If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered. #' @return the number of nodes removed #' #' @examples #' data(acme) #' acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum)) #' Prune(acme, function(x) x$cost > 700000) #' print(acme, "cost") #' #' @seealso \code{\link{Node}} #' #' @export Prune <- function(node, pruneFun) { return (.Prune(node, pruneFun, TRUE)) } .Prune <- function(node, pruneFun, isFirstCall = FALSE) { if (isFirstCall) cnt <- node$totalCount if ( node$isLeaf) return (0) for( i in length(node$children):1 ) { if (length(pruneFun(node$children[[i]]))==0){ stop(paste("pruneFun evaluated on node", node$children[[i]]$name, "evaluated to logical(0).", "Perhaps you should read nullAsNa in GetAttribute's help." )) } else if ( !pruneFun(node$children[[i]]) ) { rm(list = names(node$children)[i], envir = node) node$children <- node$children[-i] } } for( child in node$children) { .Prune(child, pruneFun) } if (isFirstCall) return (cnt - node$totalCount) } ================================================ FILE: R/node_methods_traversal.R ================================================ #' Traverse a tree or a sub-tree #' #' Traverse takes the root of a tree or a sub-tree, and "walks" the tree in a specific order. It returns a list of #' \code{\link{Node}} objects, filtered and pruned by \code{filterFun} and \code{pruneFun}. #' #' @param node the root of a tree or a sub-tree that should be traversed #' @param traversal any of 'pre-order' (the default), 'post-order', 'in-order', 'level', 'ancestor', or a custom function (see details) #' @param filterFun allows providing a a filter, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. #' Note that if filter returns \code{FALSE}, then the node will be excluded from the result (but not the entire subtree). #' #' @return a list of \code{Node}s #' #' @details #' The traversal order is as follows. (Note that these descriptions are not precise and complete. They are meant #' for quick reference only. See the data.tree vignette for a more detailed description). #' \describe{ #' \item{pre-order}{Go to first child, then to its first child, etc.} #' \item{post-order}{Go to the first branch's leaf, then to its siblings, and work your way back to the root} #' \item{in-order}{Go to the first branch's leaf, then to its parent, and only then to the leaf's sibling} #' \item{level}{Collect root, then level 2, then level 3, etc.} #' \item{ancestor}{Take a node, then the node's parent, then that node's parent in turn, etc. This ignores the \code{pruneFun} } #' \item{function}{You can also provide a function, whose sole parameter is a \code{\link{Node}} object. The #' function is expected to return the node's next node, a list of the node's next nodes, or NULL.} #' } #' #' #' @seealso \code{\link{Node}} #' @seealso \code{\link{Get}} #' @seealso \code{\link{Set}} #' @seealso \code{\link{Do}} #' #' @inheritParams Prune #' #' @export Traverse = function(node, traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), pruneFun = NULL, filterFun = NULL) { #traverses in various orders. See http://en.wikipedia.org/wiki/Tree_traversal nodes <- list() if(length(traversal) > 1L) { traversal <- traversal[1L] } if(is.function(traversal) || traversal == "pre-order" || traversal == "post-order") { if (length(pruneFun) == 0 || pruneFun(node)) { if (is.function(traversal)) { children <- traversal(node) if (is(children, "Node")) children <- list(children) if (is.null(children)) children <- list() } else children <- node$children for(child in children) { nodes <- c(nodes, Traverse(child, traversal = traversal, pruneFun = pruneFun, filterFun = filterFun)) } if(length(filterFun) == 0 || filterFun(node)) { if(is.function(traversal) || traversal == "pre-order") nodes <- c(node, nodes) else nodes <- c(nodes, node) } } } else if(traversal == "in-order") { if(!node$isBinary) stop("traversal in-order valid only for binary trees") if(length(pruneFun) == 0 || pruneFun(node)) { if(!node$isLeaf) { n1 <- Traverse(node$children[[1]], traversal = traversal, pruneFun = pruneFun, filterFun = filterFun) if(length(filterFun) == 0 || filterFun(node)) n2 <- node else n2 <- list() n3 <- Traverse(node$children[[2]], traversal = traversal, pruneFun = pruneFun, filterFun = filterFun) nodes <- c(n1, n2, n3) } else { if(length(filterFun) == 0 || filterFun(node)) n2 <- node else n2 <- list() nodes <- c(nodes, n2) } } } else if (traversal == "ancestor") { if (!isRoot(node)) { nodes <- Traverse(node$parent, traversal = traversal, pruneFun = pruneFun, filterFun = filterFun) } if(length(filterFun) == 0 || filterFun(node)) { nodes <- c(node, nodes) } } else if (traversal == "level") { nodes <- Traverse(node, filterFun = filterFun, pruneFun = pruneFun) if (length(nodes) > 0) nodes <- nodes[order(Get(nodes, function(x) x$level))] } else { stop("traversal must be pre-order, post-order, in-order, ancestor, or level") } return (nodes) } #' Traverse a Tree and Collect Values #' #' The \code{Get} method is one of the most important ones of the \code{data.tree} package. It lets you traverse a tree #' and collect values along the way. Alternatively, you can call a method or a function on each \code{\link{Node}}. #' #' @usage #' # OO-style: #' #node$Get(attribute, #' # ..., #' # traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), #' # pruneFun = NULL, #' # filterFun = NULL, #' # format = FALSE, #' # inheritFromAncestors = FALSE) #' #' # traditional: #' Get(nodes, #' attribute, #' ..., #' format = FALSE, #' inheritFromAncestors = FALSE, #' simplify = c(TRUE, FALSE, "array", "regular")) #' #' #' @param nodes The nodes on which to perform the Get (typically obtained via \code{\link{Traverse}}) #' @param attribute determines what is collected. The \code{attribute} can be #' \itemize{ #' \item a.) the name of a \bold{field} or a \bold{property/active} of each \code{Node} in the tree, e.g. \code{acme$Get("p")} or \code{acme$Get("position")} #' \item b.) the name of a \bold{method} of each \code{Node} in the tree, e.g. \code{acme$Get("levelZeroBased")}, where e.g. \code{acme$levelZeroBased <- function() acme$level - 1} #' \item c.) a \bold{function}, whose first argument must be a \code{Node} e.g. \code{acme$Get(function(node) node$cost * node$p)} #' } #' @param ... in case the \code{attribute} is a function or a method, the ellipsis is passed to it as additional arguments. #' @param format if \code{FALSE} (the default), no formatting is being used. If \code{TRUE}, then the first formatter (if any) found along the ancestor path is being used for formatting #' (see \code{\link{SetFormat}}). If \code{format} is a function, then the collected value is passed to that function, and the result is returned. #' @param inheritFromAncestors if \code{TRUE}, then the path above a \code{Node} is searched to get the \code{attribute} in case it is NULL. #' @param simplify same as \code{\link{sapply}}, i.e. TRUE, FALSE or "array". Additionally, you can specify "regular" if #' each returned value is of length > 1, and equally named. See below for an example. #' #' @return a vector containing the \code{atrributes} collected during traversal, in traversal order. \code{NULL} is converted #' to NA, such that \code{length(Node$Get) == Node$totalCount} #' #' #' @examples #' data(acme) #' acme$Get("level") #' acme$Get("totalCount") #' #' #' acme$Get(function(node) node$cost * node$p, #' filterFun = isLeaf) #' #' #This is equivalent: #' nodes <- Traverse(acme, filterFun = isLeaf) #' Get(nodes, function(node) node$cost * node$p) #' #' #' #simplify = "regular" will preserve names #' acme$Get(function(x) c(position = x$position, level = x$level), simplify = "regular") #' #' @seealso \code{\link{Node}} #' @seealso \code{\link{Set}} #' @seealso \code{\link{Do}} #' @seealso \code{\link{Traverse}} #' #' @import methods #' #' @export Get = function(nodes, attribute, ..., format = FALSE, inheritFromAncestors = FALSE, simplify = c(TRUE, FALSE, "array", "regular")) { if (length(nodes) == 0) return(NULL) if (!is(nodes, "list")) stop("nodes must be a list of Node objects!") simplify <- simplify[1] nodes <- unname(nodes) if (simplify == "regular") { regular = TRUE simplify = FALSE } else regular = FALSE res <- sapply(nodes, function(x) GetAttribute(x, attribute, ..., format = format, inheritFromAncestors = inheritFromAncestors), simplify = simplify ) if (is.character(attribute) && attribute == "name") { names(res) <- res } else { if(is.null(dim(res))){ names(res) <- Get(nodes, "name") } else { if(is.null(dimnames(res))) dimnames(res) <- list() dimnames(res)[[length(dim(res))]] <- Get(nodes, "name") } } if (regular) { res <- do.call(cbind, res) } return (res) } #' Executes a function on a set of nodes #' #' @usage #' # OO-style: #' # node$Do(fun, #' # ..., #' # traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), #' # pruneFun = NULL, #' # filterFun = NULL) #' #' # traditional: #' Do(nodes, fun, ...) #' #' @param fun the function to execute. The function is expected to be either a Method, or to take a #' Node as its first argument #' @param ... any additional parameters to be passed on to fun #' #' @seealso \code{\link{Node}} #' @seealso \code{\link{Get}} #' @seealso \code{\link{Set}} #' @seealso \code{\link{Traverse}} #' #' @inheritParams Get #' #' @examples #' data(acme) #' traversal <- Traverse(acme) #' Do(traversal, function(node) node$expectedCost <- node$p * node$cost) #' print(acme, "expectedCost") #' #' @export Do <- function(nodes, fun, ...) { if (length(nodes) == 0) invisible(nodes) if (!is(nodes, "list")) stop("nodes must be a list of Node objects!") for (node in nodes) fun(node, ...) invisible (nodes) } #' Traverse a Tree and Assign Values #' #' The method takes one or more vectors as an argument. It traverses the tree, whereby the values are picked #' from the vector. Also available as OO-style method on \code{\link{Node}}. #' #' @usage #' #OO-style: #' # node$Set(..., #' # traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), #' # pruneFun = NULL, #' # filterFun = NULL) #' #traditional: #' Set(nodes, ...) #' #' #' @param ... each argument can be a vector of values to be assigned. Recycled. #' #' @return invisibly returns the nodes (useful for chaining) #' #' @examples #' data(acme) #' acme$Set(departmentId = 1:acme$totalCount, openingHours = NULL, traversal = "post-order") #' acme$Set(head = c("Jack Brown", #' "Mona Moneyhead", #' "Dr. Frank N. Stein", #' "Eric Nerdahl" #' ), #' filterFun = function(x) !x$isLeaf #' ) #' print(acme, "departmentId", "head") #' #' @seealso \code{\link{Node}} #' @seealso \code{\link{Get}} #' @seealso \code{\link{Do}} #' @seealso \code{\link{Traverse}} #' #' @inheritParams Get #' #' @export Set <- function(nodes, ...) { if (length(nodes) == 0) return(nodes) if (!is(nodes, "list")) stop("nodes must be a list of Node objects!") args <- list(...) argsnames <- sapply(substitute(list(...))[-1], deparse) gargsnames <- names(args) if (is.null(gargsnames)) gargsnames <- vector(mode = "character", length = length(args)) gargsnames[nchar(gargsnames) == 0] <- argsnames[nchar(gargsnames) == 0] names(args) <- gargsnames appFun <- function(x, arg, name) { x[[name]] <- arg } for(nme in names(args)) { arg <- args[[nme]] if (length(arg) == 0) arg <- vector("list", 1) mapply(appFun, nodes, arg, nme) } invisible (nodes) } ================================================ FILE: R/node_plot.R ================================================ #' @rdname ToDiagrammeRGraph #' #' @param x The root node of the data.tree structure to plot #' @inheritParams ToDataFrameNetwork #' @inheritParams DiagrammeR::render_graph #' #' @export plot.Node <- function(x, ..., direction = c("climb", "descend"), pruneFun = NULL, output = "graph") { if(!requireNamespace("DiagrammeR", quietly = TRUE)) { stop( "Package \"DiagrammeR\" is required to plot a `data.tree::Node`", "object. Please install it." )} graph <- ToDiagrammeRGraph(x, direction, pruneFun) DiagrammeR::render_graph(graph, output = output, ...) } #' Plot a graph, or get a graphviz dot representation of the tree #' #' Use these methods to style your graph, and to plot it. The functionality is built around the #' DiagrammeR package, so for anything that goes beyond simple plotting, it is recommended to read its #' documentation at http://rich-iannone.github.io/DiagrammeR/docs.html. Note that DiagrammeR is only suggested #' by data.tree, so `plot` only works if you have installed it on your system. #' #' Use \code{SetNodeStyle} and \code{SetEdgeStyle} to define the style of your plot. Use \code{plot} to display a #' graphical representation of your tree. #' #' The most common styles that can be set on the nodes are: #' \itemize{ #' \item{\code{color}} #' \item{\code{fillcolor}} #' \item{\code{fixedsize} true or false} #' \item{\code{fontcolor}} #' \item{\code{fontname}} #' \item{\code{fontsize}} #' \item{\code{height}} #' \item{\code{penwidth}} #' \item{\code{shape} box, ellipse, polygon, circle, box, etc.} #' \item{\code{style}} #' \item{\code{tooltip}} #' \item{\code{width}} #' } #' The most common styles that can be set on the edges are: #' \itemize{ #' \item{\code{arrowhead} e.g. normal, dot, vee} #' \item{\code{arrowsize}} #' \item{\code{arrowtail}} #' \item{\code{color}} #' \item{\code{dir} forward, back, both, none} #' \item{\code{fontcolor}} #' \item{\code{fontname}} #' \item{\code{fontsize}} #' \item{\code{headport}} #' \item{\code{label}} #' \item{\code{minlen}} #' \item{\code{penwidth}} #' \item{\code{tailport}} #' \item{\code{tooltip}} #' } #' A good source to understand the attributes is http://graphviz.org/Documentation.php. Another good source #' is the DiagrammeR package documentation, or more specifically: http://rich-iannone.github.io/DiagrammeR/docs.html #' #' In addition to the standard GraphViz functionality, the \code{data.tree} plotting infrastructure takes advantage #' of the fact that data.tree structure are always hierarchic. Thus, style attributes are inherited from parents #' to children on an individual basis. For example, you can set the fontcolor to red on a parent, and then all children #' will also have red font, except if you specifically disallow inheritance. Labels and tooltips are never inherited. #' #' Another feature concerns functions: Instead of setting a fixed value (e.g. \code{SetNodeStyle(acme, label = "Acme. Inc"}), #' you can set a function (e.g. \code{SetNodeStyle(acme, label = function(x) x$name)}). The function must take a \code{\link{Node}} #' as its single argument. Together with inheritance, this becomes a very powerful tool. #' #' The \code{GetDefaultTooltip} method is a utility method that can be used to print all attributes of a \code{\link{Node}}. #' #' There are some more examples in the 'applications' vignette, see \code{vignette('applications', package = "data.tree")} #' #' @param root The root \code{\link{Node}} of the data.tree structure to visualize. #' @param node The \code{\link{Node}} of the data.tree structure on which you would like to set style attributes. #' @param ... For the SetStyle methods, this can be any stlyeName / value pair. See #' http://graphviz.org/Documentation.php for details. For the plot.Node generic method, this is not used. #' #' @inheritParams Prune #' #' @examples #' data(acme) #' SetGraphStyle(acme, rankdir = "TB") #' SetEdgeStyle(acme, arrowhead = "vee", color = "blue", penwidth = 2) #' #per default, Node style attributes will be inherited: #' SetNodeStyle(acme, style = "filled,rounded", shape = "box", fillcolor = "GreenYellow", #' fontname = "helvetica", tooltip = GetDefaultTooltip) #' SetNodeStyle(acme$IT, fillcolor = "LightBlue", penwidth = "5px") #' #inheritance can be avoided: #' SetNodeStyle(acme$Accounting, inherit = FALSE, fillcolor = "Thistle", #' fontcolor = "Firebrick", tooltip = "This is the accounting department") #' SetEdgeStyle(acme$Research$`New Labs`, #' color = "red", #' label = "Focus!", #' penwidth = 3, #' fontcolor = "red") #' #use Do to set style on specific nodes: #' Do(acme$leaves, function(node) SetNodeStyle(node, shape = "egg")) #' plot(acme) #' #' #print p as label, where available: #' SetNodeStyle(acme, label = function(node) node$p) #' plot(acme) #' #' @export ToDiagrammeRGraph <- function(root, direction = c("climb", "descend"), pruneFun = NULL) { if(!requireNamespace("DiagrammeR", quietly = TRUE)) { stop( "Package \"DiagrammeR\" is required to convert a `data.tree::Node`", "to a DiagrammeR graph. Please install it." )} #get unique node styles defined on tree ns <- unique(unlist(sapply(root$Get(function(x) attr(x, "nodeStyle"), simplify = FALSE), names))) # set tmp .id tr <- Traverse(root, pruneFun = pruneFun) Set(tr, `.id` = 1:length(tr)) #create nodes df myargs <- list() if(!"label" %in% ns) ns <- c(ns, "label") for (style in ns) { myargs[[style]] <- Get(tr, function(x) { myns <- GetStyle(x, style, "node") if (style == "label" && length(myns) == 0) myns <- x$name #if (is.null(myns)) myns <- "" myns }) } nodes <- do.call(DiagrammeR::create_node_df, c(n = length(tr), myargs)) ## escape quotes in names to avoid problems with the gviz nodes$label <- gsub("\"", "\\\\\"", nodes$label) # get unique edge styles es <- unique(unlist(sapply(root$Get(function(x) attr(x, "edgeStyle"), simplify = FALSE), names))) myargs <- list() #see http://stackoverflow.com/questions/19749923/function-factory-in-r for (style in es) { myargs[[style]] <- GetEdgeStyleFactory(style) } edges <- do.call("ToDataFrameNetwork", c(root, from = function(node) node$parent$`.id`, to = ".id", myargs, direction = list(direction), pruneFun = pruneFun))[,-(1:2)] if (nrow(edges) > 0) { edges <- do.call(DiagrammeR::create_edge_df, as.list(edges)) } graph <- DiagrammeR::create_graph(nodes, edges, attr_theme = NULL) # global attributes # (we'd prefer to set the default on the root as graphAttributes, but # due to a DiagrammeR bug/feature this is not possible). So instead # repeating styles redundantly graphAttributes <- attr(root, "graphStyle") #if (is.null(graphAttributes)) graphAttributes <- "" #nodeAttributes <- GetDefaultStyles(root, type = "node") #edgeAttributes <- GetDefaultStyles(root, type = "edge") nodeAttributes <- NULL edgeAttributes <- NULL #graph <- set_global_graph_attrs(graph, "layout", "dot", "graph") graph <- DiagrammeR::add_global_graph_attrs( graph, attr = c(names(graphAttributes), names(nodeAttributes), names(edgeAttributes)), value = c(graphAttributes, nodeAttributes, edgeAttributes), attr_type = c(rep('graph', length(graphAttributes)), rep('node', length(nodeAttributes)), rep('edge', length(edgeAttributes))) ) return (graph) } GetEdgeStyleFactory <- function(style) { style <- force(style) function(node = node, origNode = node) { myes <- GetStyle(node, style, "edge") #if (is.null(myes)) myes <- "" myes } } #' @param inherit If TRUE, then children will inherit this node's style. #' Otherwise they inherit from this node's parent. Note that the inherit #' always applies to the node, i.e. all style attributes of a node and not #' to a single style attribute. #' #' @param keepExisting If TRUE, then style attributes are added to possibly #' existing style attributes on the node. #' #' @rdname ToDiagrammeRGraph #' #' @export SetNodeStyle <- function(node, inherit = TRUE, keepExisting = FALSE, ...) { SetStyle(node, "node", inherit, keepExisting, ...) } #' @rdname ToDiagrammeRGraph #' @export SetEdgeStyle <- function(node, inherit = TRUE, keepExisting = FALSE, ...) { SetStyle(node, "edge", inherit, keepExisting, ...) } SetStyle <- function(node, type = c("node", "edge"), inherit = TRUE, keepExisting = FALSE, ...) { type <- type[1] an <- paste0(type, "Style") ain <- paste0(type, "StyleInherit") if (keepExisting) { ll <- attr(node, an) ll <- c(ll, list(...)) } else ll <- list(...) attr(node, an) <- ll attr(node, ain) <- inherit } #' @rdname ToDiagrammeRGraph #' @export SetGraphStyle <- function(root, keepExisting = FALSE, ...) { if (keepExisting) { ll <- attr(root, "graphStyle") ll <- c(ll, list(...)) } else ll <- list(...) attr(root, "graphStyle") <- ll } GetStyle <- function(node, styleName, type = c("node", "edge"), origNode = node) { type <- type[1] inh <- attr(node, paste0(type, "StyleInherit")) res <- attr(node, paste0(type, "Style"))[[styleName]] if (!is.null(res)) { if (!isRoot(node)) { if (identical(node, origNode) || (inh && !styleName %in% c("label", "tooltip"))) {# either on myself or inheritable if (is.function(res)) res <- res(origNode) return (res) } } else { #root if (is.function(res)) res <- res(origNode) return (res) } } #recursion exit criteria if (isRoot(node)) return (NULL) #recursion GetStyle(node$parent, styleName, type, origNode = origNode) } GetDefaultStyles <- function(node, type = c("node", "edge")) { type <- type[1] node <- node$root inh <- attr(node, paste0(type, "StyleInherit")) res <- attr(node, paste0(type, "Style")) if (!is.null(res) && inh) { res <- res[!names(res) %in% c("label", "tooltip")] isFun <- sapply(res, is.function) res <- res[!isFun] if (length(res) == 0) return (NULL) #res <- paste(names(res), paste0("'", res, "'"), sep = " = ", collapse = ", ") return (res) } else return (NULL) } ================================================ FILE: R/register-s3.R ================================================ #' Register a method for a suggested dependency #' #' Code copied into data.tree from `vctrs` (authors Wickham H, Henry L, #' Vaughan D; https://github.com/r-lib/vctrs) #' #' Generally, the recommend way to register an S3 method is to use the #' `S3Method()` namespace directive (often generated automatically be the #' `@export` roxygen2 tag). However, this technique requires that the generic #' be in an imported package, and sometimes you want to suggest a package, #' and only provide a method when that package is loaded. `s3_register()` #' can be called from your package's `.onLoad()` to dynamically register #' a method only if the generic's package is loaded. (To avoid taking a #' dependency on vctrs for this one function, please feel free to copy #' and paste the function source into your own package.) #' #' For R 3.5.0 and later, `s3_register()` is also useful when demonstrating #' class creation in a vignette, since method lookup no longer always involves #' the lexical scope. For R 3.6.0 and later, you can achieve a similar effect #' by using "delayed method registration", i.e. placing the following in your #' `NAMESPACE` file: #' #' ``` #' if (getRversion() >= "3.6.0") { #' S3method(package::generic, class) #' } #' ``` #' #' @param generic Name of the generic in the form `pkg::generic`. #' @param class Name of the class #' @param method Optionally, the implementation of the method. By default, #' this will be found by looking for a function called `generic.class` #' in the package environment. #' #' Note that providing `method` can be dangerous if you use #' devtools. When the namespace of the method is reloaded by #' `devtools::load_all()`, the function will keep inheriting from #' the old namespace. This might cause crashes because of dangling #' `.Call()` pointers. #' @examples #' # A typical use case is to dynamically register tibble/pillar methods #' # for your class. That way you avoid creating a hard depedency on packages #' # that are not essential, while still providing finer control over #' # printing when they are used. #' #' .onLoad <- function(...) { #' s3_register("pillar::pillar_shaft", "vctrs_vctr") #' s3_register("tibble::type_sum", "vctrs_vctr") #' } #' @keywords internal # nocov start s3_register <- function(generic, class, method = NULL) { stopifnot(is.character(generic), length(generic) == 1) stopifnot(is.character(class), length(class) == 1) pieces <- strsplit(generic, "::")[[1]] stopifnot(length(pieces) == 2) package <- pieces[[1]] generic <- pieces[[2]] caller <- parent.frame() get_method_env <- function() { top <- topenv(caller) if (isNamespace(top)) { asNamespace(environmentName(top)) } else { caller } } get_method <- function(method, env) { if (is.null(method)) { get(paste0(generic, ".", class), envir = get_method_env()) } else { method } } method_fn <- get_method(method) stopifnot(is.function(method_fn)) # Always register hook in case package is later unloaded & reloaded setHook( packageEvent(package, "onLoad"), function(...) { ns <- asNamespace(package) # Refresh the method, it might have been updated by `devtools::load_all()` method_fn <- get_method(method) registerS3method(generic, class, method_fn, envir = ns) } ) # Avoid registration failures during loading (pkgload or regular) if (!isNamespaceLoaded(package)) { return(invisible()) } envir <- asNamespace(package) # Only register if generic can be accessed if (exists(generic, envir)) { registerS3method(generic, class, method_fn, envir = envir) } invisible() } # nocov end ================================================ FILE: R/release.R ================================================ release_questions <- function() { c( "Have you set the date in DESCRIPTION?", "Have you updated NEWS?", "Have you verified that the application vignette looks ok?", "Have you verified that the data.tree vignette looks ok?", "Have you read the Node documentation ?Node", "Have you checked that all the reserved words are listed in Node?" ) } ================================================ FILE: R/util.R ================================================ #' Format a Number as a Percentage #' #' This utility method can be used as a format function when converting trees to a \code{data.frame} #' #' @param x A number #' @param digits The number of digits to print #' @param format The format to use #' @param ... Any other argument passed to formatC #' @return A string corresponding to x, suitable for printing #' #' @examples #' data(acme) #' print(acme, prob = acme$Get("p", format = FormatPercent)) #' #' @seealso formatC #' @export FormatPercent <- function(x, digits = 2, format = "f", ...) { ifelse(is.null(x) || is.na(x), "", paste(formatC(100 * x, format = format, digits = digits, ...), "%")) } #' Format a Number as a Decimal #' #' Simple function that can be used as a format function when converting trees to a \code{data.frame} #' #' @param x a numeric scalar or vector #' @param digits the number of digits to print after the decimal point #' @return A string corresponding to x, suitable for printing #' #' @examples #' data(acme) #' print(acme, prob = acme$Get("p", format = function(x) FormatFixedDecimal(x, 4))) #' #' @export FormatFixedDecimal <- function(x, digits = 3) { ifelse(is.null(x) || is.na(x), "", sprintf(paste0("%.",digits, "f"),x)) } #' Calculates the height of a \code{Node} given the height of the root. #' #' This function puts leafs at the bottom (not hanging), and makes edges equally long. #' Useful for easy plotting with third-party packages, e.g. if you have no specific height #' attribute, e.g. with \code{\link{as.dendrogram.Node}}, \code{\link{ToNewick}}, #' and \code{\link{as.phylo.Node}} #' #' @param node The node #' @param rootHeight The height of the root #' #' @examples #' data(acme) #' dacme <- as.dendrogram(acme, heightAttribute = function(x) DefaultPlotHeight(x, 200)) #' plot(dacme, center = TRUE) #' #' @export DefaultPlotHeight <- function(node, rootHeight = 100) { if (node$isRoot) return ( rootHeight ) if (node$isLeaf) return ( 0 ) h <- DefaultPlotHeight(node$parent, rootHeight) * (1 - 1 / node$height) return (h) } SetHeight2 <- function(node, rootHeight = 100) { Set(node$leaves, height2 = 1) node$Do(function(x) x$height2 <- Aggregate(x, "height2", max) + 1, traversal = "post-order", filterFun = isNotLeaf) node$plotHeight <- rootHeight node$Do(function(x) x$plotHeight <- x$parent$plotHeight * (1 - 1 / x$height2), filterFun = isNotRoot) } #' Create a tree for demo and testing #' #' @param height the number of levels #' @param branchingFactor the number of children per node #' @param parent the parent node (for recursion) #' #' @export CreateRegularTree <- function(height = 5, branchingFactor = 3, parent = Node$new("1")) { if (height <= 1) return() for (i in 1:branchingFactor) { child <- parent$AddChild(paste(parent$name, i, sep = "."), check = FALSE) CreateRegularTree(height - 1, branchingFactor, child) } return (parent) } #' Create a tree for demo and testing #' #' @param nodes The number of nodes to create #' @param root the previous node (for recursion, typically use default value) #' @param id The id (for recursion) #' #' @export CreateRandomTree <- function(nodes = 100, root = Node$new("1"), id = 1) { if (nodes == 0) return() dpth <- root$height lvl <- sample(1:dpth, 1, rep(1/dpth)) t <- Traverse(root, filterFun = function(x) x$level == lvl) parent <- sample(t, 1)[[1]] parent$AddChild(as.character(id + 1), check = FALSE) CreateRandomTree(nodes - 1, root = root, id = id + 1) return (root) } PrintPruneSimple <- function(x, limit) { tc <- x$totalCount toBeCropped <- tc - limit if (toBeCropped < 1) { if(!isRoot(x)) { #clone s.t. x is root (for pretty level names) x <- Clone(x, attributes = TRUE) x$parent <- NULL } return (x) } x$Set(.id = 1:tc) x$Do(function(x) { x$.originalTotalCount <- ifelse(x$isLeaf, 1, sum( sapply(x$children, function(x) x$.originalTotalCount)) + 1) x$.originalCount <- x$count }, traversal = "post-order" ) xc <- Clone(x, pruneFun = function(x) x$.id < limit, attributes = TRUE) xc$Do(function(x) { if(x$count < x$.originalCount) { nds <- x$.originalCount - x$count sub <- x$.originalTotalCount - x$totalCount - nds x$AddChild(paste0("... ", nds, " nodes w/ ", sub, " sub")) } }) x <- xc } PrintPruneDist <- function(x, limit) { tc <- x$totalCount toBeCropped <- tc - limit if (toBeCropped < 1) { if(!isRoot(x)) { #clone s.t. x is root (for pretty level names) x <- Clone(x, attributes = TRUE) x$parent <- NULL } return (x) } t <- Traverse(x, traversal = "post-order") Do(t, function(x) { x$.height <- ifelse(x$isLeaf, 1, x$children[[1]]$.height + 1) }) Do(t, function(x) { x$.originalTotalCount <- ifelse(x$isLeaf, 1, sum( sapply(x$children, function(x) x$.originalTotalCount)) + 1) }) t <- Traverse(x) Set(t, .id = 1:tc) x$.level <- 1 Do(t, function(x) { x$.originalCount <- length(x$children) x$.level <- ifelse(isRoot(x), 1, x$parent$.level + 1) }) t <- t[order(Get(t, ".level"), - Get(t, ".height"), Get(t, function(x) x$position > 2))] keep <- c(rep(TRUE, limit), rep(FALSE, toBeCropped)) Set(t, .keep = keep) #sapply(t, function(x) paste(x$.height, x$.level, x$name, sep = ".")) xc <- Clone(x, pruneFun = function(x) x$.keep, attributes = TRUE) t <- Traverse(xc) Do(t, function(x) { if(x$count < x$.originalCount) { nds <- x$.originalCount - x$count sub <- x$.originalTotalCount - x$totalCount - nds x$AddChild(paste0("... ", nds, " nodes w/ ", sub, " sub")) } }) x <- xc } #' @rdname ToDiagrammeRGraph #' @export GetDefaultTooltip <- function(node) { if (length(node$attributes) > 0) { myattributes <- node$attributes } else { myattributes <- "name" } tt <- paste(sapply(myattributes, function(x) { v <- node[[x]] if (is.function(v)) v <- "function" else v <- GetAttribute(node, x) paste0("- ", x, ": ", v) }), collapse = "\n") return (tt) } #' Checks whether \code{name} is a reserved word, as defined in \code{NODE_RESERVED_NAMES_CONST}. #' #' @param name the name to check #' @param check Either #' \itemize{ #' \item{\code{"check"}: if the name conformance should be checked and warnings should be printed in case of non-conformance (the default)} #' \item{\code{"no-warn"}: if the name conformance should be checked, but no warnings should be printed in case of non-conformance (if you expect non-conformance)} #' \item{\code{"no-check" or FALSE}: if the name conformance should not be checked; use this if performance is critical. However, in case of non-conformance, expect cryptic follow-up errors} #' } CheckNameReservedWord <- function(name, check = c("check", "no-warn", "no-check")) { check <- check[1] if (check == FALSE) return (name) if (check == "no-check") return (name) if (!(check == FALSE || check == "no-check")) { if (name %in% NODE_RESERVED_NAMES_CONST) { name2 <- paste0(name, "2") if (check != "no-warn") { warning(paste0("Name '", name, "' is a reserved word as defined in NODE_RESERVED_NAMES_CONST. Using '", name2, "' instead.")) } name <- name2 } } return (name) } ================================================ FILE: R/zzz.R ================================================ # nocov start .onLoad <- function(libname, pkgname) { if(getRversion() >= "3.6.0") { # register S3-methods from Suggested packages s3_register("igraph::as.igraph", "Node") s3_register("ape::as.phylo", "Node") } invisible() } # nocov end ================================================ FILE: README.md ================================================ CRAN: [![CRAN Version](http://www.r-pkg.org/badges/version/data.tree)](https://cran.r-project.org/package=data.tree/) [![CRAN downloads](http://cranlogs.r-pkg.org/badges/data.tree)](https://cran.r-project.org/package=data.tree/) [![R-CMD-check](https://github.com/gluc/data.tree/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/gluc/data.tree/actions/workflows/R-CMD-check.yaml) # data.tree An R package to manage hierarchical data and tree structures Hierarchical data is ubiquitous in statistics and programming (XML, search trees, family trees, classification, file system, etc.). However, no general-use *tree data structure* is available in base R. Where tabular data has data.frame, hierarchical data is often modeled in lists of lists or similar makeshifts. These structures are often difficult to manage. This is where the data.tree package steps in. It lets you build trees of hierarchical data for various uses: to print, plot and visualize, to generate breakdowns, to integrate with html widgets, to rapid prototype search algorithms, to test out new classification ideas, and much more. Tree structures can be created programmatically, or by conversion. The package provides functionality to convert from and to various formats such as data.frames, list of lists, dendrograms, partykit, ape phylo, igraph, JSON, YAML, and more. # Learn More To get started, you might want to read the [introduction vignette](https://CRAN.R-project.org/package=data.tree/vignettes/data.tree.html). There is also a vignette containing some [examples and applications](https://CRAN.R-project.org/package=data.tree/vignettes/applications.html). The manual is [here](https://CRAN.R-project.org/package=data.tree/data.tree.pdf) Finally, you'll find more examples and background information on my [blog](http://ipub.com/data-tree). # NOTE: The latest from github dev branch may have some breaking changes compared to CRAN. See [NEWS](https://github.com/gluc/data.tree/blob/dev/NEWS) for details. # Conventions: Coding Conventions: Google Style Guide, see https://google.github.io/styleguide/Rguide.xml Versioning Conventions: SemanticVersioning. See http://semver.org/ for details Branching Conventions: GitFlow. See https://www.atlassian.com/git/tutorials/comparing-workflows/gitflow-workflow Pull Requests: Very welcome! Please branch from the dev branch. ================================================ FILE: appveyor.yml ================================================ # DO NOT CHANGE the "init" and "install" sections below # Download script file from GitHub init: ps: | $ErrorActionPreference = "Stop" Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" Import-Module '..\appveyor-tool.ps1' install: ps: Bootstrap # Adapt as necessary starting from here #in case of problems, you might want to comment this out to clean the cache #See here: https://www.appveyor.com/docs/build-cache/ cache: #- C:\RLibrary environment: global: WARNINGS_ARE_ERRORS: 1 matrix: - R_VERSION: devel GCC_PATH: mingw_32 - R_VERSION: release R_ARCH: x64 - R_VERSION: stable - R_VERSION: patched build_script: - travis-tool.sh install_deps test_script: - travis-tool.sh run_tests on_failure: - 7z a failure.zip *.Rcheck\* - appveyor PushArtifact failure.zip artifacts: - path: '*.Rcheck\**\*.log' name: Logs - path: '*.Rcheck\**\*.out' name: Logs - path: '*.Rcheck\**\*.fail' name: Logs - path: '*.Rcheck\**\*.Rout' name: Logs - path: '\*_*.tar.gz' name: Bits - path: '\*_*.zip' name: Bits ================================================ FILE: cran-comments.md ================================================ ## General Comments This release was done as requested by Kurt Hornik, because of a problem with roxygen2. I now used "_PACKAGE" and it should solvel the problem. Also I added a few features and fixed a few bugs. Finally, I deprecated two functions, as anounced earlier ($fields and $fieldsAll). Best Regards, Christoph ## Test environments * github -> (macos-latest release, windows-latest release, ubuntu-latest devel, ubuntu-latest release, ubuntu-latest oldrel-1) -> OK * rhub::check_for_cran -> NOTES ## R CMD check results There were no ERRORs or WARNINGs. I'm getting funny NOTES on rhub::check_for_cran: ``` * checking CRAN incoming feasibility ... [11s] NOTE Maintainer: 'Christoph Glur ' New maintainer: Christoph Glur Old maintainer(s): Christoph Glur * checking Rd files ... NOTE checkRd: (-1) s3_register.Rd:46-48: Lost braces 46 | if (getRversion() >= "3.6.0") { | ^ * checking for non-standard things in the check directory ... NOTE Found the following files/directories: ''NULL'' * checking for detritus in the temp directory ... NOTE Found the following files/directories: 'lastMiKTeXException' ``` I couldn't reproduce this anywhere else, and I'm not sure if this is an issue with my code or with the check environment. Let me know if I was careless and there is anything for me to fix. ## revdepcheck results We checked 53 reverse dependencies (46 from CRAN + 7 from Bioconductor), comparing R CMD check results across CRAN and dev versions of this package. * We saw 3 new problems * We failed to check 0 packages Issues with CRAN packages are summarised below. ### New problems (This reports the first line of each new failure) * collapsibleTree checking examples ... WARNING * directotree checking examples ... WARNING * forestry checking examples ... WARNING ✔ behaviorchange 0.5.5 ── E: 0 | W: 0 | N: 0 ✖ collapsibleTree 0.1.7 ── E: 0 | W: 0 +1 | N: 2 ✔ covid19dbcand 0.1.1 ── E: 0 | W: 0 | N: 0 ✔ Cluster.OBeu 1.2.3 ── E: 0 | W: 0 | N: 0 ✔ CondCopulas 0.1.3 ── E: 0 | W: 0 | N: 0 ✖ directotree 1.0.0 ── E: 0 | W: 0 +1 | N: 1 ✔ CovRegRF 1.0.4 ── E: 0 | W: 0 | N: 0 ✔ echarty 1.6.2 ── E: 0 | W: 0 | N: 0 ✔ changepoints 1.1.0 ── E: 0 | W: 0 | N: 0 ✔ filterNHP 0.1.2 ── E: 0 | W: 0 | N: 1 ✖ forestry 0.1.0 ── E: 0 | W: 0 +1 | N: 0 ✔ echarts4r 0.4.5 ── E: 0 | W: 0 | N: 0 ✔ GE 0.4.0 ── E: 0 | W: 0 | N: 0 ✔ gimme 0.7.15 ── E: 0 | W: 0 | N: 0 ✔ galah 1.5.4 ── E: 0 | W: 0 | N: 0 ✔ icesTAF 4.2.0 ── E: 0 | W: 0 | N: 0 ✔ htetree 0.1.17 ── E: 0 | W: 0 | N: 0 ✔ justifier 0.2.6 ── E: 0 | W: 0 | N: 0 ✔ LinTInd 1.6.0 ── E: 1 | W: 0 | N: 2 ✔ cola 2.8.0 ── E: 1 | W: 0 | N: 1 ✔ nmarank 0.3.0 ── E: 0 | W: 0 | N: 0 ✔ LACE 2.6.0 ── E: 0 | W: 0 | N: 1 ✔ momentuHMM 1.5.5 ── E: 1 | W: 0 | N: 0 ✔ pmxTools 1.3 ── E: 0 | W: 1 | N: 0 ✔ nonlinearICP 0.1.2.1 ── E: 0 | W: 0 | N: 0 ✔ radiant.model 1.6.3 ── E: 0 | W: 0 | N: 0 ✔ Pi 2.14.0 ── E: 0 | W: 0 | N: 1 ✔ ranktreeEnsemble 0.22 ── E: 0 | W: 0 | N: 0 ✔ randomForestSRC 3.2.2 ── E: 0 | W: 0 | N: 0 ✔ RFpredInterval 1.0.7 ── E: 0 | W: 0 | N: 0 ✔ Rgff 0.1.6 ── E: 0 | W: 0 | N: 1 ✔ SACCR 3.2 ── E: 0 | W: 0 | N: 0 ✔ shinyTree 0.3.1 ── E: 0 | W: 0 | N: 0 ✔ ClassifyR 3.6.2 ── E: 1 | W: 0 | N: 3 ✔ rock 0.6.7 ── E: 0 | W: 0 | N: 0 ✔ scicomptools 1.0.0 ── E: 0 | W: 0 | N: 0 ✔ SoilTaxonomy 0.2.3 ── E: 0 | W: 0 | N: 0 ✔ rocTree 1.1.1 ── E: 0 | W: 0 | N: 1 ✔ styler 1.10.2 ── E: 0 | W: 0 | N: 0 ✔ supportR 1.2.0 ── E: 0 | W: 0 | N: 0 ✔ tidygraph 1.2.3 ── E: 0 | W: 0 | N: 0 ✔ starvz 0.7.1 ── E: 0 | W: 0 | N: 0 ✔ triversity 1.0 ── E: 0 | W: 0 | N: 0 ✔ TT 0.98 ── E: 0 | W: 0 | N: 0 ✔ VERSO 1.12.0 ── E: 0 | W: 0 | N: 0 ✔ voronoiTreemap 0.2.0 ── E: 0 | W: 0 | N: 0 ✔ TKCat 1.0.7 ── E: 0 | W: 0 | N: 0 ✔ webchem 1.3.0 ── E: 0 | W: 0 | N: 0 ✔ yum 0.1.0 ── E: 0 | W: 0 | N: 0 ✔ wrTopDownFrag 1.0.2 ── E: 0 | W: 0 | N: 1 ✔ scAnnotatR 1.8.0 ── E: 0 | W: 0 | N: 0 ✔ wrMisc 1.13.0 ── E: 0 | W: 0 | N: 0 ✔ UniprotR 2.3.0 ── E: 0 | W: 0 | N: 0 ### Problems Description All three problems are a result of the deprecated functions. I notified the maintainers of the packages. ================================================ FILE: data.tree.Rproj ================================================ Version: 1.0 RestoreWorkspace: No SaveWorkspace: No AlwaysSaveHistory: No EnableCodeIndexing: Yes UseSpacesForTab: Yes NumSpacesForTab: 2 Encoding: UTF-8 RnwWeave: knitr LaTeX: pdfLaTeX BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source PackageBuildArgs: --resave-data PackageBuildBinaryArgs: --resave-data PackageCheckArgs: --as-cran PackageRoxygenize: rd,collate,namespace ================================================ FILE: data_gen/acme.R ================================================ #Run this to generate data(acme) #library(data.tree)ac acme <- Node$new("Acme Inc.") accounting <- acme$AddChild("Accounting") software <- accounting$AddChild("New Software") standards <- accounting$AddChild("New Accounting Standards") research <- acme$AddChild("Research") newProductLine <- research$AddChild("New Product Line") newLabs <- research$AddChild("New Labs") it <- acme$AddChild("IT") outsource <- it$AddChild("Outsource") agile <- it$AddChild("Go agile") goToR <- it$AddChild("Switch to R") software$cost <- 1000000 standards$cost <- 500000 newProductLine$cost <- 2000000 newLabs$cost <- 750000 outsource$cost <- 400000 agile$cost <- 250000 goToR$cost <- 50000 software$p <- 0.5 standards$p <- 0.75 newProductLine$p <- 0.25 newLabs$p <- 0.9 outsource$p <- 0.2 agile$p <- 0.05 goToR$p <- 1 save(acme, file = "data/acme.rda", compress = "xz") ================================================ FILE: data_gen/mushroom.R ================================================ color <- c('red', 'brown', 'brown', 'green', 'red') size <- c('small', 'small', 'large', 'small', 'large') points <- c('yes', 'no', 'yes', 'no', 'no') edible <- c('toxic', 'edible', 'edible', 'edible', 'edible') mushroom <- data.frame(color = color, size = size, points = points, edibility = edible) save(mushroom, file = "data/mushroom.rda", compress = "xz") ================================================ FILE: getting-started-with-development.md ================================================ # Dev Guide To develop on a new environment, you need to: 1. pull from CRAN (dev branch) 2. install R 3. install RStudio 4. install RTools (https://cran.rstudio.com/bin/windows/Rtools/) 4. install tinytex 1. install.packages('tinytex') 2. tinytex::install_tinytex() 3. tinytex:::install_yihui_pkgs() 5. re-start RStudio 6. install devtools by running `install.packages("devtools")` ================================================ FILE: inst/extdata/flare.json ================================================ { "name": "flare", "children": [ { "name": "analytics", "children": [ { "name": "cluster", "children": [ {"name": "AgglomerativeCluster", "size": 3938}, {"name": "CommunityStructure", "size": 3812}, {"name": "HierarchicalCluster", "size": 6714}, {"name": "MergeEdge", "size": 743} ] }, { "name": "graph", "children": [ {"name": "BetweennessCentrality", "size": 3534}, {"name": "LinkDistance", "size": 5731}, {"name": "MaxFlowMinCut", "size": 7840}, {"name": "ShortestPaths", "size": 5914}, {"name": "SpanningTree", "size": 3416} ] }, { "name": "optimization", "children": [ {"name": "AspectRatioBanker", "size": 7074} ] } ] }, { "name": "animate", "children": [ {"name": "Easing", "size": 17010}, {"name": "FunctionSequence", "size": 5842}, { "name": "interpolate", "children": [ {"name": "ArrayInterpolator", "size": 1983}, {"name": "ColorInterpolator", "size": 2047}, {"name": "DateInterpolator", "size": 1375}, {"name": "Interpolator", "size": 8746}, {"name": "MatrixInterpolator", "size": 2202}, {"name": "NumberInterpolator", "size": 1382}, {"name": "ObjectInterpolator", "size": 1629}, {"name": "PointInterpolator", "size": 1675}, {"name": "RectangleInterpolator", "size": 2042} ] }, {"name": "ISchedulable", "size": 1041}, {"name": "Parallel", "size": 5176}, {"name": "Pause", "size": 449}, {"name": "Scheduler", "size": 5593}, {"name": "Sequence", "size": 5534}, {"name": "Transition", "size": 9201}, {"name": "Transitioner", "size": 19975}, {"name": "TransitionEvent", "size": 1116}, {"name": "Tween", "size": 6006} ] }, { "name": "data", "children": [ { "name": "converters", "children": [ {"name": "Converters", "size": 721}, {"name": "DelimitedTextConverter", "size": 4294}, {"name": "GraphMLConverter", "size": 9800}, {"name": "IDataConverter", "size": 1314}, {"name": "JSONConverter", "size": 2220} ] }, {"name": "DataField", "size": 1759}, {"name": "DataSchema", "size": 2165}, {"name": "DataSet", "size": 586}, {"name": "DataSource", "size": 3331}, {"name": "DataTable", "size": 772}, {"name": "DataUtil", "size": 3322} ] }, { "name": "display", "children": [ {"name": "DirtySprite", "size": 8833}, {"name": "LineSprite", "size": 1732}, {"name": "RectSprite", "size": 3623}, {"name": "TextSprite", "size": 10066} ] }, { "name": "flex", "children": [ {"name": "FlareVis", "size": 4116} ] }, { "name": "physics", "children": [ {"name": "DragForce", "size": 1082}, {"name": "GravityForce", "size": 1336}, {"name": "IForce", "size": 319}, {"name": "NBodyForce", "size": 10498}, {"name": "Particle", "size": 2822}, {"name": "Simulation", "size": 9983}, {"name": "Spring", "size": 2213}, {"name": "SpringForce", "size": 1681} ] }, { "name": "query", "children": [ {"name": "AggregateExpression", "size": 1616}, {"name": "And", "size": 1027}, {"name": "Arithmetic", "size": 3891}, {"name": "Average", "size": 891}, {"name": "BinaryExpression", "size": 2893}, {"name": "Comparison", "size": 5103}, {"name": "CompositeExpression", "size": 3677}, {"name": "Count", "size": 781}, {"name": "DateUtil", "size": 4141}, {"name": "Distinct", "size": 933}, {"name": "Expression", "size": 5130}, {"name": "ExpressionIterator", "size": 3617}, {"name": "Fn", "size": 3240}, {"name": "If", "size": 2732}, {"name": "IsA", "size": 2039}, {"name": "Literal", "size": 1214}, {"name": "Match", "size": 3748}, {"name": "Maximum", "size": 843}, { "name": "methods", "children": [ {"name": "add", "size": 593}, {"name": "and", "size": 330}, {"name": "average", "size": 287}, {"name": "count", "size": 277}, {"name": "distinct", "size": 292}, {"name": "div", "size": 595}, {"name": "eq", "size": 594}, {"name": "fn", "size": 460}, {"name": "gt", "size": 603}, {"name": "gte", "size": 625}, {"name": "iff", "size": 748}, {"name": "isa", "size": 461}, {"name": "lt", "size": 597}, {"name": "lte", "size": 619}, {"name": "max", "size": 283}, {"name": "min", "size": 283}, {"name": "mod", "size": 591}, {"name": "mul", "size": 603}, {"name": "neq", "size": 599}, {"name": "not", "size": 386}, {"name": "or", "size": 323}, {"name": "orderby", "size": 307}, {"name": "range", "size": 772}, {"name": "select", "size": 296}, {"name": "stddev", "size": 363}, {"name": "sub", "size": 600}, {"name": "sum", "size": 280}, {"name": "update", "size": 307}, {"name": "variance", "size": 335}, {"name": "where", "size": 299}, {"name": "xor", "size": 354}, {"name": "_", "size": 264} ] }, {"name": "Minimum", "size": 843}, {"name": "Not", "size": 1554}, {"name": "Or", "size": 970}, {"name": "Query", "size": 13896}, {"name": "Range", "size": 1594}, {"name": "StringUtil", "size": 4130}, {"name": "Sum", "size": 791}, {"name": "Variable", "size": 1124}, {"name": "Variance", "size": 1876}, {"name": "Xor", "size": 1101} ] }, { "name": "scale", "children": [ {"name": "IScaleMap", "size": 2105}, {"name": "LinearScale", "size": 1316}, {"name": "LogScale", "size": 3151}, {"name": "OrdinalScale", "size": 3770}, {"name": "QuantileScale", "size": 2435}, {"name": "QuantitativeScale", "size": 4839}, {"name": "RootScale", "size": 1756}, {"name": "Scale", "size": 4268}, {"name": "ScaleType", "size": 1821}, {"name": "TimeScale", "size": 5833} ] }, { "name": "util", "children": [ {"name": "Arrays", "size": 8258}, {"name": "Colors", "size": 10001}, {"name": "Dates", "size": 8217}, {"name": "Displays", "size": 12555}, {"name": "Filter", "size": 2324}, {"name": "Geometry", "size": 10993}, { "name": "heap", "children": [ {"name": "FibonacciHeap", "size": 9354}, {"name": "HeapNode", "size": 1233} ] }, {"name": "IEvaluable", "size": 335}, {"name": "IPredicate", "size": 383}, {"name": "IValueProxy", "size": 874}, { "name": "math", "children": [ {"name": "DenseMatrix", "size": 3165}, {"name": "IMatrix", "size": 2815}, {"name": "SparseMatrix", "size": 3366} ] }, {"name": "Maths", "size": 17705}, {"name": "Orientation", "size": 1486}, { "name": "palette", "children": [ {"name": "ColorPalette", "size": 6367}, {"name": "Palette", "size": 1229}, {"name": "ShapePalette", "size": 2059}, {"name": "SizePalette", "size": 2291} ] }, {"name": "Property", "size": 5559}, {"name": "Shapes", "size": 19118}, {"name": "Sort", "size": 6887}, {"name": "Stats", "size": 6557}, {"name": "Strings", "size": 22026} ] }, { "name": "vis", "children": [ { "name": "axis", "children": [ {"name": "Axes", "size": 1302}, {"name": "Axis", "size": 24593}, {"name": "AxisGridLine", "size": 652}, {"name": "AxisLabel", "size": 636}, {"name": "CartesianAxes", "size": 6703} ] }, { "name": "controls", "children": [ {"name": "AnchorControl", "size": 2138}, {"name": "ClickControl", "size": 3824}, {"name": "Control", "size": 1353}, {"name": "ControlList", "size": 4665}, {"name": "DragControl", "size": 2649}, {"name": "ExpandControl", "size": 2832}, {"name": "HoverControl", "size": 4896}, {"name": "IControl", "size": 763}, {"name": "PanZoomControl", "size": 5222}, {"name": "SelectionControl", "size": 7862}, {"name": "TooltipControl", "size": 8435} ] }, { "name": "data", "children": [ {"name": "Data", "size": 20544}, {"name": "DataList", "size": 19788}, {"name": "DataSprite", "size": 10349}, {"name": "EdgeSprite", "size": 3301}, {"name": "NodeSprite", "size": 19382}, { "name": "render", "children": [ {"name": "ArrowType", "size": 698}, {"name": "EdgeRenderer", "size": 5569}, {"name": "IRenderer", "size": 353}, {"name": "ShapeRenderer", "size": 2247} ] }, {"name": "ScaleBinding", "size": 11275}, {"name": "Tree", "size": 7147}, {"name": "TreeBuilder", "size": 9930} ] }, { "name": "events", "children": [ {"name": "DataEvent", "size": 2313}, {"name": "SelectionEvent", "size": 1880}, {"name": "TooltipEvent", "size": 1701}, {"name": "VisualizationEvent", "size": 1117} ] }, { "name": "legend", "children": [ {"name": "Legend", "size": 20859}, {"name": "LegendItem", "size": 4614}, {"name": "LegendRange", "size": 10530} ] }, { "name": "operator", "children": [ { "name": "distortion", "children": [ {"name": "BifocalDistortion", "size": 4461}, {"name": "Distortion", "size": 6314}, {"name": "FisheyeDistortion", "size": 3444} ] }, { "name": "encoder", "children": [ {"name": "ColorEncoder", "size": 3179}, {"name": "Encoder", "size": 4060}, {"name": "PropertyEncoder", "size": 4138}, {"name": "ShapeEncoder", "size": 1690}, {"name": "SizeEncoder", "size": 1830} ] }, { "name": "filter", "children": [ {"name": "FisheyeTreeFilter", "size": 5219}, {"name": "GraphDistanceFilter", "size": 3165}, {"name": "VisibilityFilter", "size": 3509} ] }, {"name": "IOperator", "size": 1286}, { "name": "label", "children": [ {"name": "Labeler", "size": 9956}, {"name": "RadialLabeler", "size": 3899}, {"name": "StackedAreaLabeler", "size": 3202} ] }, { "name": "layout", "children": [ {"name": "AxisLayout", "size": 6725}, {"name": "BundledEdgeRouter", "size": 3727}, {"name": "CircleLayout", "size": 9317}, {"name": "CirclePackingLayout", "size": 12003}, {"name": "DendrogramLayout", "size": 4853}, {"name": "ForceDirectedLayout", "size": 8411}, {"name": "IcicleTreeLayout", "size": 4864}, {"name": "IndentedTreeLayout", "size": 3174}, {"name": "Layout", "size": 7881}, {"name": "NodeLinkTreeLayout", "size": 12870}, {"name": "PieLayout", "size": 2728}, {"name": "RadialTreeLayout", "size": 12348}, {"name": "RandomLayout", "size": 870}, {"name": "StackedAreaLayout", "size": 9121}, {"name": "TreeMapLayout", "size": 9191} ] }, {"name": "Operator", "size": 2490}, {"name": "OperatorList", "size": 5248}, {"name": "OperatorSequence", "size": 4190}, {"name": "OperatorSwitch", "size": 2581}, {"name": "SortOperator", "size": 2023} ] }, {"name": "Visualization", "size": 16540} ] } ] } ================================================ FILE: inst/extdata/jennylind.yaml ================================================ name: Jenny Lind type: decision Sign with Movie Company: type: chance Small Box Office: type: terminal p: 0.3 payoff: 200000 Medium Box Office: type: terminal p: 0.6 payoff: 1000000 Large Box Office: type: terminal p: 0.1 payoff: 3000000 Sign with TV Network: type: chance Small Box Office: type: terminal p: 0.3 payoff: 900000 Medium Box Office: type: terminal p: 0.6 payoff: 900000 Large Box Office: type: terminal p: 0.1 payoff: 900000 ================================================ FILE: inst/extdata/portfolio.csv ================================================ ISIN,Name,Ccy,Type,Duration,Weight,AssetCategory,AssetClass,SubAssetClass LI0015327682,LGT Money Market Fund (CHF) - B,CHF,Fund,,0.0300,Cash,CHF, LI0214880598,CS (Lie) Money Market Fund EUR EB,EUR,Fund,,0.0600,Cash,EUR, LI0214880689,CS (Lie) Money Market Fund USD EB,USD,Fund,,0.0200,Cash,USD, LU0243957825,Invesco Euro Corporate Bond A EUR Acc,EUR,Fund,5.1,0.1200,Fixed Income,EUR,Sov. and Corp. Bonds LU0408877412,JPM Euro Gov Sh. Duration Bd A (acc)-EUR,EUR,Fund,2.45,0.0650,Fixed Income,EUR,Sov. and Corp. Bonds LU0376989207,Aberdeen Global Sel Emerg Mkt Bd A2 HEUR,EUR,Fund,6.8,0.0300,Fixed Income,EUR,Em. Mkts Bonds GB00B42R2118,Threadneedle European High Yield Bond Ret Grs EUR,EUR,Fund,3.4,0.0450,Fixed Income,EUR,High Yield Bonds LU0292585030,AXA IM FIIS US Short Dur HY F-C USD,USD,Fund,1.6,0.0250,Fixed Income,USD,High Yield Bonds CH0011037469,Syngenta AG,CHF,Stock,,0.0100,Equities,Switzerland, DE0008490145,DWS Zurich Invest Aktien Schweiz,EUR,Fund,,0.0500,Equities,Switzerland, NL0000303600,ING Grope NV,EUR,Stock,,0.0100,Equities,Euroland, IE00B60SWX25,Source EURO STOXX 50 UCITS ETF - A,EUR,ETF,,0.0800,Equities,Euroland, FR0000120271,TOTAL,EUR,Stock,,0.0140,Equities,Euroland, DE0008404005,Allianz SE,EUR,Stock,,0.0130,Equities,Euroland, IT0000072618,Intesa Sanpaolo S.p.A.,EUR,Stock,,0.0100,Equities,Euroland, BE0003793107,Anheuser-Busch INBEV,EUR,Stock,,0.0180,Equities,Euroland, US4581401001,Intel Corp.,USD,Stock,,0.0100,Equities,US, US0378331005,Apple Corp,USD,Stock,,0.0300,Equities,US, US4370761029,Home Depot Inc.,USD,Stock,,0.0150,Equities,US, US5949181045,Microsoft Corp.,USD,Stock,,0.0140,Equities,US, US7427181091,Procter & Gamble Company,USD,Stock,,0.0120,Equities,US, GB00BH4HKS39,Vodafone Group PLC,GBP,Stock,,0.0090,Equities,UK, IE00B42Z5J44,iSHares MSCI Japan EUR Hedged,EUR,ETF,,0.0300,Equities,Japan, IE00B5377D42,iSHares MSCI Australia,USD,ETF,,0.0200,Equities,Australia, LU0040507039,Vontobel Emerging Markets Eq B,USD,Fund,,0.0550,Equities,Emerging Markets, LU0216734045,AXA WF Frm Europe Real Estate EUR,EUR,Fund,,0.0550,Alternative Investments,Real Estate,Eurozone LU0462954800,DB Platinum IV Systematic Alpha R1C-E,EUR,Fund,,0.1050,Alternative Investments,Hedge Funds, LU0239752628,UBS (LUX) Str. SICAV - Rogers Int. Com Idx (CHF),CHF,Fund,,0.0450,Alternative Investments,Commodities, ================================================ FILE: inst/extdata/useR15.csv ================================================ session,start,end,sessionName,room,seats,speaker,presentation Session 1,01.07.2015 10:30,01.07.2015 12:00,Kaleidoscope 1,Aalborghallen,790,Federico Marini,flowcatchR: A user-friendly workflow solution for the analysis of time-lapse cell flow imaging data Session 1,01.07.2015 10:30,01.07.2015 12:00,Kaleidoscope 1,Aalborghallen,790,Jonathan Clayden,Image processing and alignment with RNiftyReg and mmand Session 1,01.07.2015 10:30,01.07.2015 12:00,Kaleidoscope 1,Aalborghallen,790,Carel F. W. Peeters,rags2ridges: Ridge estimation and graphical modeling for high-dimensional precision matrices Session 1,01.07.2015 10:30,01.07.2015 12:00,Kaleidoscope 1,Aalborghallen,790,Henrik Tobias Madsen,dgRaph: Discrete factor graphs in R Session 1,01.07.2015 10:30,01.07.2015 12:00,Ecology,Gæstesalen,149,Costas Varsos,Optimized R functions for analysis of ecological community data using the R virtual laboratory (Rvlab) Session 1,01.07.2015 10:30,01.07.2015 12:00,Ecology,Gæstesalen,149,David L Miller,Building ecological models bit-by-bit Session 1,01.07.2015 10:30,01.07.2015 12:00,Ecology,Gæstesalen,149,Andrew Dolman,"Simulating ecological microcosms with systems of differential equations: tools for the scientific, technical and communication challenges" Session 1,01.07.2015 10:30,01.07.2015 12:00,Ecology,Gæstesalen,149,Marcel Austenfeld,"A Graphical User Interface for R in an Integrated Development Environment for Ecological Modeling, Scientific Image Analysis and Statistical Analysis" Session 1,01.07.2015 10:30,01.07.2015 12:00,Networks,Musiksalen,160,Gergely Daroczi,fbRads: Analyzing and managing Facebook ads from R Session 1,01.07.2015 10:30,01.07.2015 12:00,Networks,Musiksalen,160,Peter Meißner,Web scraping with R - A fast track overview. Session 1,01.07.2015 10:30,01.07.2015 12:00,Networks,Musiksalen,160,Antonio Rivero Ostoic,multiplex: Analysis of Multiple Social Networks with Algebra Session 1,01.07.2015 10:30,01.07.2015 12:00,Networks,Musiksalen,160,Gabor Csardi,What's new in igraph and networks Session 1,01.07.2015 10:30,01.07.2015 12:00,Reproducibility,Det lille Teater,224,Karthik Ram,rOpenSci: A suite of reproducible research tools in R Session 1,01.07.2015 10:30,01.07.2015 12:00,Reproducibility,Det lille Teater,224,Michael Lawrence,Enhancing reproducibility and collaboration via management of R package cohorts Session 1,01.07.2015 10:30,01.07.2015 12:00,Reproducibility,Det lille Teater,224,Joshua R. Polanin & Emily A. Hennessy,A Review of Meta-Analysis Packages in R Session 1,01.07.2015 10:30,01.07.2015 12:00,Reproducibility,Det lille Teater,224,David Smith,Simple reproducibility with the checkpoint package Session 1,01.07.2015 10:30,01.07.2015 12:00,Interfacing,Radiosalen,216,Kasper D. Hansen,Some lessons relevant to including external libraries in your R package Session 1,01.07.2015 10:30,01.07.2015 12:00,Interfacing,Radiosalen,216,Karl Millar,CXXR: Modernizing the R Interpreter Session 1,01.07.2015 10:30,01.07.2015 12:00,Interfacing,Radiosalen,216,Matt P. Dziubinski,Naturally Sweet Rcpp with Modern C++ and Boost Session 1,01.07.2015 10:30,01.07.2015 12:00,Interfacing,Radiosalen,216,Dan Putler,Linking R to the Spark MLlib Machine Learning Library Session 2,01.07.2015 13:30,01.07.2015 15:00,Kaleidoscope 2,Aalborghallen,790,Przemyslaw Biecek,"archivist: Tools for Storing, Restoring and Searching for R Objects" Session 2,01.07.2015 13:30,01.07.2015 15:00,Kaleidoscope 2,Aalborghallen,790,Joseph B. Rickert,R User Groups Session 2,01.07.2015 13:30,01.07.2015 15:00,Kaleidoscope 2,Aalborghallen,790,Richard M. Heiberger,Computational Precision and Floating-Point Arithmetic: A Teacher's Guide to Answering FAQ 7.31 Session 2,01.07.2015 13:30,01.07.2015 15:00,Kaleidoscope 2,Aalborghallen,790,Rasmus Bååth,"Tiny Data, Approximate Bayesian Computation and the Socks of Karl Broman" Session 2,01.07.2015 13:30,01.07.2015 15:00,Case study,Gæstesalen,149,Johannes Breidenbach,Using R for small area estimation in the Norwegian National Forest Inventory Session 2,01.07.2015 13:30,01.07.2015 15:00,Case study,Gæstesalen,149,Ivan Kasanický,Using R for natural gas market balancing in the Czech republic Session 2,01.07.2015 13:30,01.07.2015 15:00,Case study,Gæstesalen,149,Jakob W. Messner,Heteroscedastic censored and truncated regression for weather forecasting Session 2,01.07.2015 13:30,01.07.2015 15:00,Case study,Gæstesalen,149,Helle Sørensen,Multinomial functional regression with application to lameness detection for horses Session 2,01.07.2015 13:30,01.07.2015 15:00,Clustering,Musiksalen,160,Anders Ellern Bilgrau,Unsupervised Clustering and Meta-Analysis using Gaussian Mixture Copula Models Session 2,01.07.2015 13:30,01.07.2015 15:00,Clustering,Musiksalen,160,Claudia Beleites,Hierarchical Cluster Analysis of hyperspectral Raman images: a new point of view leads to 10000fold speedup Session 2,01.07.2015 13:30,01.07.2015 15:00,Clustering,Musiksalen,160,Silvia Liverani,Dirichlet process Bayesian clustering with the R package PReMiuM Session 2,01.07.2015 13:30,01.07.2015 15:00,Clustering,Musiksalen,160,Thomas Jagger,Examining the Environmental Characteristics of Tornado Outbreaks in the United States using Spatial Clustering Session 2,01.07.2015 13:30,01.07.2015 15:00,Data Management,Det lille Teater,224,Filip Schouwenaars,Taking testing to another level: testwhat Session 2,01.07.2015 13:30,01.07.2015 15:00,Data Management,Det lille Teater,224,Tony Fischetti,Failing fast and early: assertive/defensive programming for R data analysis pipelines Session 2,01.07.2015 13:30,01.07.2015 15:00,Data Management,Det lille Teater,224,Hadley Wickham,Getting your data into R Session 2,01.07.2015 13:30,01.07.2015 15:00,Data Management,Det lille Teater,224,Christoph Glur,A better way to manage hierarchical data Session 2,01.07.2015 13:30,01.07.2015 15:00,Data Management,Det lille Teater,224,"Indrajit Roy, Michael Lawrence",A proposal for distributed data-structures in R Session 2,01.07.2015 13:30,01.07.2015 15:00,Computational Performance,Radiosalen,216,E. James Harner,Running R+Hadoop using Docker Containers Session 2,01.07.2015 13:30,01.07.2015 15:00,Computational Performance,Radiosalen,216,Matt P. Dziubinski,Algorithmic Differentiation for Extremum Estimation: An Introduction Using RcppEigen Session 2,01.07.2015 13:30,01.07.2015 15:00,Computational Performance,Radiosalen,216,Kirill Müller,Improving computational performance with algorithm engineering Session 2,01.07.2015 13:30,01.07.2015 15:00,Computational Performance,Radiosalen,216,Helena Kotthaus,Performance Analysis for Parallel R Programs: Towards Efficient Ressource Utilization Session 2,01.07.2015 13:30,01.07.2015 15:00,Computational Performance,Radiosalen,216,David Scott,Refactoring the xtable Package Session 3,01.07.2015 16:00,01.07.2015 17:30,Kaleidoscope 3,Aalborghallen,790,Friedrich Schuster,Coding for the enterprise server - what does it mean for you? Session 3,01.07.2015 16:00,01.07.2015 17:30,Kaleidoscope 3,Aalborghallen,790,Lukas Stadler,R as a citizen in a polyglot world - the promise of the Truffle framework Session 3,01.07.2015 16:00,01.07.2015 17:30,Kaleidoscope 3,Aalborghallen,790,Tobias Verbeke,Architect. An IDE for Data Science and R Session 3,01.07.2015 16:00,01.07.2015 17:30,Kaleidoscope 3,Aalborghallen,790,Balasubramanian Narasimhan,Distributed computing with R Session 3,01.07.2015 16:00,01.07.2015 17:30,Business,Gæstesalen,149,Peter Baker,Statistical consulting using R: a DRY approach from the Australian outback Session 3,01.07.2015 16:00,01.07.2015 17:30,Business,Gæstesalen,149,Stefan Milton Bache,Using R in Production Session 3,01.07.2015 16:00,01.07.2015 17:30,Business,Gæstesalen,149,Giuseppe Bruno,Hedging and Risk Management of CDOs portfolio with R Session 3,01.07.2015 16:00,01.07.2015 17:30,Business,Gæstesalen,149,Jim Porzak,Data Driven Customer Segmentation with R Session 3,01.07.2015 16:00,01.07.2015 17:30,Spatial,Musiksalen,160,Ian Cook,Bringing Geospatial Tasks into the Mainstream of Business Analytics Session 3,01.07.2015 16:00,01.07.2015 17:30,Spatial,Musiksalen,160,Jin Li,Novel hybrid spatial predictive methods of machine learning and geostatistics with applications to terrestrial and marine environments in Australia Session 3,01.07.2015 16:00,01.07.2015 17:30,Spatial,Musiksalen,160,Matthias Eckardt,Graphical Modelling of Multivariate Spatial Point Patterns Session 3,01.07.2015 16:00,01.07.2015 17:30,Spatial,Musiksalen,160,Virgilio Gomez-Rubio,Spatial Econometrics Models with R-INLA Session 3,01.07.2015 16:00,01.07.2015 17:30,Spatial,Det lille Teater,224,Sebastian Meyer,Spatio-Temporal Analysis of Epidemic Phenomena Using the R Package surveillance Session 3,01.07.2015 16:00,01.07.2015 17:30,Databases,Det lille Teater,224,Willem Ligtenberg,Rango - Databases made easy Session 3,01.07.2015 16:00,01.07.2015 17:30,Databases,Det lille Teater,224,Hannes Mühleisen,Ad-Hoc User-Defined Functions for MonetDB with R Session 3,01.07.2015 16:00,01.07.2015 17:30,Databases,Det lille Teater,224,Mateusz Zoltak,R database connectivity: what did we leave behind? Session 3,01.07.2015 16:00,01.07.2015 17:30,Databases,Radiosalen,216,Jeroen Ooms,jsonlite and mongolite Session 3,01.07.2015 16:00,01.07.2015 17:30,Databases,Radiosalen,216,Michael Wurst,Using R Efficiently with Large Databases Session 4,02.07.2015 10:30,02.07.2015 11:00,Kaleidoscope 4,Radiosalen,216,A. Jonathan R. Godfrey,While my base R gently weeps Session 4,02.07.2015 10:30,02.07.2015 11:00,Kaleidoscope 4,Radiosalen,216,Amitai Golub,Rapid Deployment of Automatic Scoring Models to Hadoop Production Systems Session 4,02.07.2015 10:30,02.07.2015 11:00,Kaleidoscope 4,Aalborghallen,790,Matt Dowle,"Fast, stable and scalable true radix sorting" Session 4,02.07.2015 10:30,02.07.2015 11:00,Kaleidoscope 4,Aalborghallen,790,Arunkumar Srinivasan,"Fast, flexible and memory efficient data manipulation using data.table" Session 4,02.07.2015 10:30,02.07.2015 11:00,Medicine,Gæstesalen,149,Marvin Steijaert,Phenotypic deconvolution: the next frontier in pharma Session 4,02.07.2015 10:30,02.07.2015 11:00,Medicine,Gæstesalen,149,Lara Lusa,medplot: A Web Application for Dynamic Summary and Analysis of Longitudinal Medical Data Based on R and shiny Session 4,02.07.2015 10:30,02.07.2015 11:00,Medicine,Gæstesalen,149,Paul Metcalfe,Using R and free software to improve the delivery of life changing medicine to patients Session 4,02.07.2015 10:30,02.07.2015 11:00,Medicine,Gæstesalen,149,Heidi Seibold,Stratified medicine using the partykit package Session 4,02.07.2015 10:30,02.07.2015 11:00,Regression,Musiksalen,160,Han Lin Shang,The ilc package Session 4,02.07.2015 10:30,02.07.2015 11:00,Regression,Musiksalen,160,Andrew Bray,Approximately Exact Calculations for Linear Mixed Models Session 4,02.07.2015 10:30,02.07.2015 11:00,Regression,Musiksalen,160,Alexandra Kuznetsova,Shiny application for analyzing consumer preference and sensory data in a mixed effects model framework: introducing SensMixed package Session 4,02.07.2015 10:30,02.07.2015 11:00,Regression,Musiksalen,160,Chenjerai Kathy Mutambanengwe,Spatial regression of quantiles based on parametric distributions Session 4,02.07.2015 10:30,02.07.2015 11:00,Regression,Det lille Teater,224,Helen Ogden,glmmsr: fitting GLMMs with sequential reduction Session 4,02.07.2015 10:30,02.07.2015 11:00,Commercial Offerings,Det lille Teater,224,Michael Sannella,Supporting the Rapi C-language API in an R-compatible engine Session 4,02.07.2015 10:30,02.07.2015 11:00,Commercial Offerings,Det lille Teater,224,Woo J. Jung,Enabling R for Big Data with PL/R and PivotalR: Real World Examples on Hadoop & MPP Databases Session 4,02.07.2015 10:30,02.07.2015 11:00,Commercial Offerings,Det lille Teater,224,Ron Pearson,The DataRobot R Package Session 4,02.07.2015 10:30,02.07.2015 11:00,Commercial Offerings,Det lille Teater,224,Lou Bajuk-Yorgan,Applying the R Language in Streaming Applications and Business Intelligence Session 4,02.07.2015 10:30,02.07.2015 11:00,Interactive graphics,Radiosalen,216,Monika Huhn,D3 and R Shiny - Making your graphs come to life Session 4,02.07.2015 10:30,02.07.2015 11:00,Interactive graphics,Radiosalen,216,Michael Sachs,Interactive Graphics with ggplot2 and gridSVG Session 4,02.07.2015 10:30,02.07.2015 11:00,Interactive graphics,Radiosalen,216,Joe Cheng,Interactive visualization using htmlwidgets and Shiny Session 4,02.07.2015 10:30,02.07.2015 11:00,Interactive graphics,Radiosalen,216,Adrian Waddell,Interactive Data Visualization using the Loon package Session 4,02.07.2015 10:30,02.07.2015 11:00,Interactive graphics,Radiosalen,216,Wayne Oldford,New interactive visualization tools for exploring high dimensional data in R Session 5,02.07.2015 13:00,02.07.2015 14:30,Kaleidoscope 5,Aalborghallen,790,Aimee Gott,Formalising R Development - ValidR Enterprise Session 5,02.07.2015 13:00,02.07.2015 14:30,Kaleidoscope 5,Aalborghallen,790,Christoph Best,Integrating R with the Go programming language using interprocess communication Session 5,02.07.2015 13:00,02.07.2015 14:30,Kaleidoscope 5,Aalborghallen,790,Jennifer Bryan,Fun times with R and Google Sheets Session 5,02.07.2015 13:00,02.07.2015 14:30,Kaleidoscope 5,Aalborghallen,790,Jonathan Digby-North,A Comparative Study of Complex Estimation Software Session 5,02.07.2015 13:00,02.07.2015 14:30,Kaleidoscope 5,Aalborghallen,790,Oliver Keyes,Software Standards in the R Community: An Analysis Session 5,02.07.2015 13:00,02.07.2015 14:30,Teaching 1,Gæstesalen,149,Miranda Y Mortlock,SWOT analysis on using R for online training Session 5,02.07.2015 13:00,02.07.2015 14:30,Teaching 1,Gæstesalen,149,Eric Hare,Manipulation of Discrete Random Variables in R with discreteRV Session 5,02.07.2015 13:00,02.07.2015 14:30,Teaching 1,Gæstesalen,149,Matthias Gehrke,Teaching R in heterogeneous settings: Lessons learned Session 5,02.07.2015 13:00,02.07.2015 14:30,Teaching 1,Gæstesalen,149,Chris Wild,Interactive applications written in R to accelerate statistical learning Session 5,02.07.2015 13:00,02.07.2015 14:30,Teaching 1,Gæstesalen,149,James Curran,Classroom experiments Session 5,02.07.2015 13:00,02.07.2015 14:30,Statistical Methodology 1,Musiksalen,160,Thomas Kiefer,TAM: An R Package for Item Response Modelling Session 5,02.07.2015 13:00,02.07.2015 14:30,Statistical Methodology 1,Musiksalen,160,Genaro Sucarrat,gets: General-to-Specific (GETS) Modelling Session 5,02.07.2015 13:00,02.07.2015 14:30,Statistical Methodology 1,Musiksalen,160,Thouvenot Vincent,R Package CASA: Component Automatic Selection in Additive models Session 5,02.07.2015 13:00,02.07.2015 14:30,Statistical Methodology 1,Musiksalen,160,Christian Ritz,Dose-response analysis using R revisited Session 5,02.07.2015 13:00,02.07.2015 14:30,Statistical Methodology 1,Musiksalen,160,Kaylea Haynes,Changepoints over a Range of Penalties using the changepoint package Session 5,02.07.2015 13:00,02.07.2015 14:30,Machine Learning 1,Det lille Teater,224,"Neda Daneshgar, Majid Sarmad",Word Alignment tools in R Session 5,02.07.2015 13:00,02.07.2015 14:30,Machine Learning 1,Det lille Teater,224,Markus Loecher,Rapid detection of spatiotemporal clusters Session 5,02.07.2015 13:00,02.07.2015 14:30,Machine Learning 1,Det lille Teater,224,"Arash Fard, Vishrut Gupta",Scalable distributed random-forest in R Session 5,02.07.2015 13:00,02.07.2015 14:30,Machine Learning 1,Det lille Teater,224,Marie Chavent,Multivariate analysis of mixed data: The PCAmixdata R package Session 5,02.07.2015 13:00,02.07.2015 14:30,Machine Learning 1,Det lille Teater,224,Natalia da Silva,PPforest Session 5,02.07.2015 13:00,02.07.2015 14:30,Visualisation 1,Radiosalen,216,Katrin Grimm,Reordering and selecting continuous variables for scatterplot matrices Session 5,02.07.2015 13:00,02.07.2015 14:30,Visualisation 1,Radiosalen,216,Kirsten Van Hoorde,R-package to assess and visualize the calibration of multiclass risk predictions Session 5,02.07.2015 13:00,02.07.2015 14:30,Visualisation 1,Radiosalen,216,Martijn Tennekes,tmap: creating thematic maps in a flexible way Session 5,02.07.2015 13:00,02.07.2015 14:30,Visualisation 1,Radiosalen,216,Tal Galili,"The dendextend R package for manipulation of dendograms,visualization and comparison" Session 6,02.07.2015 16:00,02.07.2015 17:30,Kaleidoscope 6,Aalborghallen,790,Gabor Csardi,The METACRAN experiment Session 6,02.07.2015 16:00,02.07.2015 17:30,Kaleidoscope 6,Aalborghallen,790,Pedro J. Aphalo,Using R in photobiology Session 6,02.07.2015 16:00,02.07.2015 17:30,Kaleidoscope 6,Aalborghallen,790,Sven Jesper Knudsen,Industrial Big Data Analytics for Wind Turbines Session 6,02.07.2015 16:00,02.07.2015 17:30,Kaleidoscope 6,Aalborghallen,790,Andrie de Vries,The Network Structure of R Packages Session 6,02.07.2015 16:00,02.07.2015 17:30,Teaching 2,Gæstesalen,149,Gail Potter,Web Application Teaching Tools for Statistics Using Shiny and R Session 6,02.07.2015 16:00,02.07.2015 17:30,Teaching 2,Gæstesalen,149,an online,Teaching R in Session 6,02.07.2015 16:00,02.07.2015 17:30,Teaching 2,Gæstesalen,149,Jonathan Cornelissen,class Session 6,02.07.2015 16:00,02.07.2015 17:30,Teaching 2,Gæstesalen,149,Colin Rundel,Teaching R using the github ecosystem Session 6,02.07.2015 16:00,02.07.2015 17:30,Teaching 2,Musiksalen,160,Mine Cetinkaya-Rundel,"Using R, RStudio, and Docker for introductory statistics teaching" Session 6,02.07.2015 16:00,02.07.2015 17:30,Statistical Methodology 2,Musiksalen,160,Christoph Sax,seasonal: An X-13 interface for seasonal adjustment Session 6,02.07.2015 16:00,02.07.2015 17:30,Statistical Methodology 2,Musiksalen,160,Sören Möller,Estimating the Linfoot correlation in R Session 6,02.07.2015 16:00,02.07.2015 17:30,Statistical Methodology 2,Musiksalen,160,Alexander Kowarik,Seasonal Adjustment with the R packages x12 and x12GUI Session 6,02.07.2015 16:00,02.07.2015 17:30,Statistical Methodology 2,Det lille Teater,224,Il Do Ha,frailtyHL: R package for variable selection in general frailty models for various survival data Session 6,02.07.2015 16:00,02.07.2015 17:30,Machine Learning 2,Det lille Teater,224,Jan Wijffels,Massive Online Data Stream Mining using R and MOA Session 6,02.07.2015 16:00,02.07.2015 17:30,Machine Learning 2,Det lille Teater,224,Søren Havelund Welling,forestFloor: a package to visualize and comprehend the full curvature of random forests Session 6,02.07.2015 16:00,02.07.2015 17:30,Machine Learning 2,Det lille Teater,224,Douglas Mason,Machine Learning for Internal Product Measurement Session 6,02.07.2015 16:00,02.07.2015 17:30,Machine Learning 2,Det lille Teater,224,Erin LeDell,h2oEnsemble for Scalable Ensemble Learning in R Session 6,02.07.2015 16:00,02.07.2015 17:30,Visualisation 2,Radiosalen,216,Thomas Levine,Plotting data as music videos in R Session 6,02.07.2015 16:00,02.07.2015 17:30,Visualisation 2,Radiosalen,216,Eric Bonnet,NaviCell Web Service for Network-based Data Visualization Session 6,02.07.2015 16:00,02.07.2015 17:30,Visualisation 2,Radiosalen,216,Laure Cougnaud,Easy visualizations of high-dimensional genomic data Session 6,02.07.2015 16:00,02.07.2015 17:30,Visualisation 2,Radiosalen,216,Paul Murrell,The gridGraphics Package ================================================ FILE: man/Aggregate.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{Aggregate} \alias{Aggregate} \title{Aggregate child values of a \code{Node}, recursively.} \usage{ Aggregate(node, attribute, aggFun, ...) } \arguments{ \item{node}{the \code{Node} on which to aggregate} \item{attribute}{determines what is collected. The \code{attribute} can be \itemize{ \item a.) the name of a \bold{field} or a \bold{property/active} of each \code{Node} in the tree, e.g. \code{acme$Get("p")} or \code{acme$Get("position")} \item b.) the name of a \bold{method} of each \code{Node} in the tree, e.g. \code{acme$Get("levelZeroBased")}, where e.g. \code{acme$levelZeroBased <- function() acme$level - 1} \item c.) a \bold{function}, whose first argument must be a \code{Node} e.g. \code{acme$Get(function(node) node$cost * node$p)} }} \item{aggFun}{the aggregation function to be applied to the children's \code{attributes}} \item{...}{any arguments to be passed on to attribute (in case it's a function)} } \description{ The \code{Aggregate} method lets you fetch an attribute from a \code{Node}'s children, and then aggregate them using \code{aggFun}. For example, you can aggregate cost by summing costs of child \code{Nodes}. This is especially useful in the context of tree traversal, when using post-order traversal mode. } \details{ As with \code{\link{Get}}, the attribute can be a field, a method or a function. If the attribute on a child is \code{NULL}, \code{Aggregate} is called recursively on its children. } \examples{ data(acme) #Aggregate on a field Aggregate(acme, "cost", sum) #This is the same as: HomeRolledAggregate <- function(node) { sum(sapply(node$children, function(child) { if (!is.null(child$cost)) child$cost else HomeRolledAggregate(child) })) } HomeRolledAggregate(acme) #Aggregate using Get print(acme, "cost", minCost = acme$Get(Aggregate, "cost", min)) #use Aggregate with a function: Aggregate(acme, function(x) x$cost * x$p, sum) #cache values along the way acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum), traversal = "post-order") acme$IT$cost } \seealso{ \code{\link{Node}} } ================================================ FILE: man/AreNamesUnique.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{AreNamesUnique} \alias{AreNamesUnique} \title{Test whether all node names are unique.} \usage{ AreNamesUnique(node) } \arguments{ \item{node}{The root \code{Node} of the \code{data.tree} structure to test} } \value{ \code{TRUE} if all \code{Node$name == TRUE} for all nodes in the tree } \description{ This can be useful for some conversions. } \examples{ data(acme) AreNamesUnique(acme) acme$name <- "IT" AreNamesUnique(acme) } \seealso{ as.igraph.Node } ================================================ FILE: man/CheckNameReservedWord.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/util.R \name{CheckNameReservedWord} \alias{CheckNameReservedWord} \title{Checks whether \code{name} is a reserved word, as defined in \code{NODE_RESERVED_NAMES_CONST}.} \usage{ CheckNameReservedWord(name, check = c("check", "no-warn", "no-check")) } \arguments{ \item{name}{the name to check} \item{check}{Either \itemize{ \item{\code{"check"}: if the name conformance should be checked and warnings should be printed in case of non-conformance (the default)} \item{\code{"no-warn"}: if the name conformance should be checked, but no warnings should be printed in case of non-conformance (if you expect non-conformance)} \item{\code{"no-check" or FALSE}: if the name conformance should not be checked; use this if performance is critical. However, in case of non-conformance, expect cryptic follow-up errors} }} } \description{ Checks whether \code{name} is a reserved word, as defined in \code{NODE_RESERVED_NAMES_CONST}. } ================================================ FILE: man/Climb.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{Climb} \alias{Climb} \title{Climb a tree from parent to children, by provided criteria.} \usage{ #node$Climb(...) Climb(node, ...) } \arguments{ \item{node}{The root \code{\link{Node}} of the tree or subtree to climb} \item{...}{an attribute-value pairlist to be searched. For brevity, you can also provide a character vector to search for names.} } \value{ the \code{Node} having path \code{...}, or \code{NULL} if such a path does not exist } \description{ This method lets you climb the tree, from crutch to crutch. On each \code{Node}, the \code{Climb} finds the first child having attribute value equal to the the provided argument. } \examples{ data(acme) #the following are all equivalent Climb(acme, 'IT', 'Outsource') Climb(acme, name = 'IT', name = 'Outsource') Climb(acme, 'IT')$Climb('Outsource') Navigate(acme, path = "IT/Outsource") Climb(acme, name = 'IT') Climb(acme, position = c(2, 1)) #or, equivalent: Climb(acme, position = 2, position = 1) Climb(acme, name = "IT", cost = 250000) tree <- CreateRegularTree(5, 2) tree$Climb(c("1", "1"), position = c(2, 2))$path } \seealso{ \code{\link{Node}} \code{\link{Navigate}} } ================================================ FILE: man/Clone.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{Clone} \alias{Clone} \title{Clone a tree (creates a deep copy)} \usage{ Clone(node, pruneFun = NULL, attributes = FALSE) } \arguments{ \item{node}{the root node of the tree or sub-tree to clone} \item{pruneFun}{allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} \item{attributes}{if FALSE, then R class attributes (e.g. formatters and grViz styles) are not cloned. This makes the method faster.} } \value{ the clone of the tree or sub-tree } \description{ The method also clones object attributes (such as the formatters), if desired. If the method is called on a non-root, then the parent relationship is not cloned, and the resulting \code{\link{Node}} will be a root. } \examples{ data(acme) acmeClone <- Clone(acme) acmeClone$name <- "New Acme" # acmeClone does not point to the same reference object anymore: acme$name #cloning a subtree data(acme) itClone <- Clone(acme$IT) itClone$isRoot } \seealso{ SetFormat } ================================================ FILE: man/CreateRandomTree.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/util.R \name{CreateRandomTree} \alias{CreateRandomTree} \title{Create a tree for demo and testing} \usage{ CreateRandomTree(nodes = 100, root = Node$new("1"), id = 1) } \arguments{ \item{nodes}{The number of nodes to create} \item{root}{the previous node (for recursion, typically use default value)} \item{id}{The id (for recursion)} } \description{ Create a tree for demo and testing } ================================================ FILE: man/CreateRegularTree.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/util.R \name{CreateRegularTree} \alias{CreateRegularTree} \title{Create a tree for demo and testing} \usage{ CreateRegularTree(height = 5, branchingFactor = 3, parent = Node$new("1")) } \arguments{ \item{height}{the number of levels} \item{branchingFactor}{the number of children per node} \item{parent}{the parent node (for recursion)} } \description{ Create a tree for demo and testing } ================================================ FILE: man/Cumulate.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{Cumulate} \alias{Cumulate} \title{Cumulate values among siblings} \usage{ Cumulate(node, attribute, aggFun, ...) } \arguments{ \item{node}{The node on which we want to cumulate} \item{attribute}{determines what is collected. The \code{attribute} can be \itemize{ \item a.) the name of a \bold{field} or a \bold{property/active} of each \code{Node} in the tree, e.g. \code{acme$Get("p")} or \code{acme$Get("position")} \item b.) the name of a \bold{method} of each \code{Node} in the tree, e.g. \code{acme$Get("levelZeroBased")}, where e.g. \code{acme$levelZeroBased <- function() acme$level - 1} \item c.) a \bold{function}, whose first argument must be a \code{Node} e.g. \code{acme$Get(function(node) node$cost * node$p)} }} \item{aggFun}{the aggregation function to be applied to the children's \code{attributes}} \item{...}{any arguments to be passed on to attribute (in case it's a function)} } \description{ For example, you can sum up values of siblings before this \code{Node}. } \examples{ data(acme) acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum), traversal = "post-order") acme$Do(function(x) x$cumCost <- Cumulate(x, "cost", sum)) print(acme, "cost", "cumCost") } ================================================ FILE: man/DefaultPlotHeight.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/util.R \name{DefaultPlotHeight} \alias{DefaultPlotHeight} \title{Calculates the height of a \code{Node} given the height of the root.} \usage{ DefaultPlotHeight(node, rootHeight = 100) } \arguments{ \item{node}{The node} \item{rootHeight}{The height of the root} } \description{ This function puts leafs at the bottom (not hanging), and makes edges equally long. Useful for easy plotting with third-party packages, e.g. if you have no specific height attribute, e.g. with \code{\link{as.dendrogram.Node}}, \code{\link{ToNewick}}, and \code{\link{as.phylo.Node}} } \examples{ data(acme) dacme <- as.dendrogram(acme, heightAttribute = function(x) DefaultPlotHeight(x, 200)) plot(dacme, center = TRUE) } ================================================ FILE: man/Distance.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{Distance} \alias{Distance} \title{Find the distance between two nodes of the same tree} \usage{ Distance(node1, node2) } \arguments{ \item{node1}{the first node in the tree} \item{node2}{the second node in the same tree} } \description{ The distance is measured as the number of edges that need to be traversed to reach node2 when starting from node1. } \examples{ data(acme) Distance(FindNode(acme, "Outsource"), FindNode(acme, "Research")) } ================================================ FILE: man/Do.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods_traversal.R \name{Do} \alias{Do} \title{Executes a function on a set of nodes} \usage{ # OO-style: # node$Do(fun, # ..., # traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), # pruneFun = NULL, # filterFun = NULL) # traditional: Do(nodes, fun, ...) } \arguments{ \item{nodes}{The nodes on which to perform the Get (typically obtained via \code{\link{Traverse}})} \item{fun}{the function to execute. The function is expected to be either a Method, or to take a Node as its first argument} \item{...}{any additional parameters to be passed on to fun} } \description{ Executes a function on a set of nodes } \examples{ data(acme) traversal <- Traverse(acme) Do(traversal, function(node) node$expectedCost <- node$p * node$cost) print(acme, "expectedCost") } \seealso{ \code{\link{Node}} \code{\link{Get}} \code{\link{Set}} \code{\link{Traverse}} } ================================================ FILE: man/FindNode.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{FindNode} \alias{FindNode} \title{Find a node by name in the (sub-)tree} \usage{ FindNode(node, name) } \arguments{ \item{node}{The root \code{Node} of the tree or sub-tree to search} \item{name}{The name of the \code{Node} to be returned} } \value{ The first \code{Node} whose name matches, or \code{NULL} if no such \code{Node} is found. } \description{ Scans the entire sub-tree spanned by \code{node} and returns the first \code{\link{Node}} having the \code{name} specified. This is mainly useful for trees whose name is unique. If \code{\link{AreNamesUnique}} is \code{FALSE}, i.e. if there is more than one \code{Node} called \code{name} in the tree, then it is undefined which one will be returned. Also note that this method is not particularly fast. See examples for a faster way to index large trees, if you need to do multiple searches. See \code{\link{Traverse}} if you need to find multiple \code{Nodes}. } \examples{ data(acme) FindNode(acme, "Outsource") #re-usable hashed index for multiple searches: if(!AreNamesUnique(acme)) stop("Hashed index works for unique names only!") trav <- Traverse(acme, "level") names(trav) <- Get(trav, "name") nameIndex <- as.environment(trav) #you could also use hash from package hash instead! #nameIndex <- hash(trav) nameIndex$Outsource nameIndex$IT } \seealso{ AreNamesUnique, Traverse } ================================================ FILE: man/FormatFixedDecimal.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/util.R \name{FormatFixedDecimal} \alias{FormatFixedDecimal} \title{Format a Number as a Decimal} \usage{ FormatFixedDecimal(x, digits = 3) } \arguments{ \item{x}{a numeric scalar or vector} \item{digits}{the number of digits to print after the decimal point} } \value{ A string corresponding to x, suitable for printing } \description{ Simple function that can be used as a format function when converting trees to a \code{data.frame} } \examples{ data(acme) print(acme, prob = acme$Get("p", format = function(x) FormatFixedDecimal(x, 4))) } ================================================ FILE: man/FormatPercent.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/util.R \name{FormatPercent} \alias{FormatPercent} \title{Format a Number as a Percentage} \usage{ FormatPercent(x, digits = 2, format = "f", ...) } \arguments{ \item{x}{A number} \item{digits}{The number of digits to print} \item{format}{The format to use} \item{...}{Any other argument passed to formatC} } \value{ A string corresponding to x, suitable for printing } \description{ This utility method can be used as a format function when converting trees to a \code{data.frame} } \examples{ data(acme) print(acme, prob = acme$Get("p", format = FormatPercent)) } \seealso{ formatC } ================================================ FILE: man/Get.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods_traversal.R \name{Get} \alias{Get} \title{Traverse a Tree and Collect Values} \usage{ # OO-style: #node$Get(attribute, # ..., # traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), # pruneFun = NULL, # filterFun = NULL, # format = FALSE, # inheritFromAncestors = FALSE) # traditional: Get(nodes, attribute, ..., format = FALSE, inheritFromAncestors = FALSE, simplify = c(TRUE, FALSE, "array", "regular")) } \arguments{ \item{nodes}{The nodes on which to perform the Get (typically obtained via \code{\link{Traverse}})} \item{attribute}{determines what is collected. The \code{attribute} can be \itemize{ \item a.) the name of a \bold{field} or a \bold{property/active} of each \code{Node} in the tree, e.g. \code{acme$Get("p")} or \code{acme$Get("position")} \item b.) the name of a \bold{method} of each \code{Node} in the tree, e.g. \code{acme$Get("levelZeroBased")}, where e.g. \code{acme$levelZeroBased <- function() acme$level - 1} \item c.) a \bold{function}, whose first argument must be a \code{Node} e.g. \code{acme$Get(function(node) node$cost * node$p)} }} \item{...}{in case the \code{attribute} is a function or a method, the ellipsis is passed to it as additional arguments.} \item{format}{if \code{FALSE} (the default), no formatting is being used. If \code{TRUE}, then the first formatter (if any) found along the ancestor path is being used for formatting (see \code{\link{SetFormat}}). If \code{format} is a function, then the collected value is passed to that function, and the result is returned.} \item{inheritFromAncestors}{if \code{TRUE}, then the path above a \code{Node} is searched to get the \code{attribute} in case it is NULL.} \item{simplify}{same as \code{\link{sapply}}, i.e. TRUE, FALSE or "array". Additionally, you can specify "regular" if each returned value is of length > 1, and equally named. See below for an example.} } \value{ a vector containing the \code{atrributes} collected during traversal, in traversal order. \code{NULL} is converted to NA, such that \code{length(Node$Get) == Node$totalCount} } \description{ The \code{Get} method is one of the most important ones of the \code{data.tree} package. It lets you traverse a tree and collect values along the way. Alternatively, you can call a method or a function on each \code{\link{Node}}. } \examples{ data(acme) acme$Get("level") acme$Get("totalCount") acme$Get(function(node) node$cost * node$p, filterFun = isLeaf) #This is equivalent: nodes <- Traverse(acme, filterFun = isLeaf) Get(nodes, function(node) node$cost * node$p) #simplify = "regular" will preserve names acme$Get(function(x) c(position = x$position, level = x$level), simplify = "regular") } \seealso{ \code{\link{Node}} \code{\link{Set}} \code{\link{Do}} \code{\link{Traverse}} } ================================================ FILE: man/GetAttribute.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{GetAttribute} \alias{GetAttribute} \title{Get an attribute from a Node.} \usage{ GetAttribute( node, attribute, ..., format = FALSE, inheritFromAncestors = FALSE, nullAsNa = TRUE ) } \arguments{ \item{node}{The \code{\link{Node}} from which the \code{attribute} should be fetched.} \item{attribute}{determines what is collected. The \code{attribute} can be \itemize{ \item a.) the name of a \bold{field} or a \bold{property/active} of each \code{Node} in the tree, e.g. \code{acme$Get("p")} or \code{acme$Get("position")} \item b.) the name of a \bold{method} of each \code{Node} in the tree, e.g. \code{acme$Get("levelZeroBased")}, where e.g. \code{acme$levelZeroBased <- function() acme$level - 1} \item c.) a \bold{function}, whose first argument must be a \code{Node} e.g. \code{acme$Get(function(node) node$cost * node$p)} }} \item{...}{in case the \code{attribute} is a function or a method, the ellipsis is passed to it as additional arguments.} \item{format}{if \code{FALSE} (the default), no formatting is being used. If \code{TRUE}, then the first formatter (if any) found along the ancestor path is being used for formatting (see \code{\link{SetFormat}}). If \code{format} is a function, then the collected value is passed to that function, and the result is returned.} \item{inheritFromAncestors}{if \code{TRUE}, then the path above a \code{Node} is searched to get the \code{attribute} in case it is NULL.} \item{nullAsNa}{If TRUE (the default), then NULL is returned as NA. Otherwise it is returned as NULL.} } \description{ Get an attribute from a Node. } \examples{ data(acme) GetAttribute(acme$IT$Outsource, "cost") } ================================================ FILE: man/GetPhyloNr.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_ape.R \name{GetPhyloNr} \alias{GetPhyloNr} \title{Determine the number a \code{Node} has after conversion to a phylo object} \usage{ GetPhyloNr(x, type = c("node", "edge")) } \arguments{ \item{x}{The Node} \item{type}{Either "node" (the default) or "edge" (to get the number of the edge from \code{x} to its parent)} } \value{ an integer representing the node } \description{ Use this function when plotting a Node as a phylo, e.g. to set custom labels to plot. } \examples{ library(ape) library(data.tree) data(acme) ap <- as.phylo(acme) #plot(ap) #nodelabels("IT Dep.", GetPhyloNr(Climb(acme, "IT"))) #edgelabels("Good!", GetPhyloNr(Climb(acme, "IT", "Switch to R"), "edge")) } \seealso{ Other ape phylo conversions: \code{\link{as.Node.phylo}()}, \code{\link{as.phylo.Node}()} } \concept{ape phylo conversions} ================================================ FILE: man/NODE_RESERVED_NAMES_CONST.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node.R \docType{data} \name{NODE_RESERVED_NAMES_CONST} \alias{NODE_RESERVED_NAMES_CONST} \title{Names that are reserved by the Node class.} \format{ An object of class \code{character} of length 43. } \usage{ NODE_RESERVED_NAMES_CONST } \description{ These are reserved by the Node class, you cannot use these as attribute names. Note also that all attributes starting with a . are reserved. } \keyword{datasets} ================================================ FILE: man/Navigate.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{Navigate} \alias{Navigate} \title{Navigate to another node by relative path.} \usage{ Navigate(node, path) } \arguments{ \item{node}{The starting \code{\link{Node}} to navigate} \item{path}{A string or a character vector describing the path to navigate} } \description{ Navigate to another node by relative path. } \details{ The \code{path} is always relative to the \code{node}. Navigation to the parent is defined by \code{..}, whereas navigation to a child is defined via the child's name. If path is provided as a string, then the navigation steps are separated by '/'. } \examples{ data(acme) Navigate(acme$Research, "../IT/Outsource") Navigate(acme$Research, c("..", "IT", "Outsource")) } \seealso{ \code{\link{Climb}} } ================================================ FILE: man/Node.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node.R \docType{class} \name{Node} \alias{Node} \title{Create a \code{data.tree} Structure With \code{Nodes}} \format{ An \code{\link{R6Class}} generator object } \usage{ # n1 <- Node$new("Node 1") } \description{ \code{Node} is at the very heart of the \code{data.tree} package. All trees are constructed by tying together \code{Node} objects. } \details{ Assemble \code{Node} objects into a \code{data.tree} structure and use the traversal methods to set, get, and perform operations on it. Typically, you construct larger tree structures by converting from \code{data.frame}, \code{list}, or other formats. Most methods (e.g. \code{node$Sort()}) also have a functional form (e.g. \code{Sort(node)}) } \examples{ library(data.tree) acme <- Node$new("Acme Inc.") accounting <- acme$AddChild("Accounting")$ AddSibling("Research")$ AddChild("New Labs")$ parent$ AddSibling("IT")$ AddChild("Outsource") print(acme) ## ------------------------------------------------ ## Method `Node$new` ## ------------------------------------------------ node <- Node$new("mynode", x = 2, y = "value of y") node$y ## ------------------------------------------------ ## Method `Node$AddChild` ## ------------------------------------------------ root <- Node$new("myroot", myname = "I'm the root") root$AddChild("child1", myname = "I'm the favorite child") child2 <- root$AddChild("child2", myname = "I'm just another child") child3 <- child2$AddChild("child3", myname = "Grandson of a root!") print(root, "myname") ## ------------------------------------------------ ## Method `Node$AddChildNode` ## ------------------------------------------------ root <- Node$new("myroot") child <- Node$new("mychild") root$AddChildNode(child) ## ------------------------------------------------ ## Method `Node$AddSibling` ## ------------------------------------------------ #' root <- Node$new("myroot") child <- root$AddChild("child1") sibling <- child$AddSibling("sibling1") ## ------------------------------------------------ ## Method `Node$AddSiblingNode` ## ------------------------------------------------ root <- Node$new("myroot") child <- Node$new("mychild") sibling <- Node$new("sibling") root$AddChildNode(child)$AddSiblingNode(sibling) ## ------------------------------------------------ ## Method `Node$RemoveChild` ## ------------------------------------------------ node <- Node$new("myroot")$AddChild("mychild")$root node$RemoveChild("mychild") ## ------------------------------------------------ ## Method `Node$RemoveAttribute` ## ------------------------------------------------ node <- Node$new("mynode") node$RemoveAttribute("age", stopIfNotAvailable = FALSE) node$age <- 27 node$RemoveAttribute("age") node ## ------------------------------------------------ ## Method `Node$Sort` ## ------------------------------------------------ data(acme) acme$Do(function(x) x$totalCost <- Aggregate(x, "cost", sum), traversal = "post-order") Sort(acme, "totalCost", decreasing = FALSE) print(acme, "totalCost") ## ------------------------------------------------ ## Method `Node$Prune` ## ------------------------------------------------ data(acme) acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum)) Prune(acme, function(x) x$cost > 700000) print(acme, "cost") ## ------------------------------------------------ ## Method `Node$Climb` ## ------------------------------------------------ data(acme) #the following are all equivalent Climb(acme, 'IT', 'Outsource') Climb(acme, name = 'IT', name = 'Outsource') Climb(acme, 'IT')$Climb('Outsource') Navigate(acme, path = "IT/Outsource") Climb(acme, name = 'IT') Climb(acme, position = c(2, 1)) #or, equivalent: Climb(acme, position = 2, position = 1) Climb(acme, name = "IT", cost = 250000) tree <- CreateRegularTree(5, 2) tree$Climb(c("1", "1"), position = c(2, 2))$path ## ------------------------------------------------ ## Method `Node$Navigate` ## ------------------------------------------------ data(acme) Navigate(acme$Research, "../IT/Outsource") Navigate(acme$Research, c("..", "IT", "Outsource")) ## ------------------------------------------------ ## Method `Node$Get` ## ------------------------------------------------ data(acme) acme$Get("level") acme$Get("totalCount") acme$Get(function(node) node$cost * node$p, filterFun = isLeaf) #This is equivalent: nodes <- Traverse(acme, filterFun = isLeaf) Get(nodes, function(node) node$cost * node$p) #simplify = "regular" will preserve names acme$Get(function(x) c(position = x$position, level = x$level), simplify = "regular") ## ------------------------------------------------ ## Method `Node$Do` ## ------------------------------------------------ data(acme) acme$Do(function(node) node$expectedCost <- node$p * node$cost) print(acme, "expectedCost") ## ------------------------------------------------ ## Method `Node$Set` ## ------------------------------------------------ data(acme) acme$Set(departmentId = 1:acme$totalCount, openingHours = NULL, traversal = "post-order") acme$Set(head = c("Jack Brown", "Mona Moneyhead", "Dr. Frank N. Stein", "Eric Nerdahl" ), filterFun = function(x) !x$isLeaf ) print(acme, "departmentId", "head") } \seealso{ For more details see the \code{\link{data.tree}} documentations, or the \code{data.tree} vignette: \code{vignette("data.tree")} \code{\link{Node}} \code{\link{Sort}} } \section{Active bindings}{ \if{html}{\out{
}} \describe{ \item{\code{name}}{Gets or sets the name of a \code{Node}. For example \code{Node$name <- "Acme"}.} \item{\code{printFormatters}}{gets or sets the formatters used to print a \code{Node}. Set this as a list to a root node. The different formatters are h (horizontal), v (vertical), l (L), j (junction), and s (separator). For example, you can set the formatters to \code{list(h = "\u2500" , v = "\u2502", l = "\u2514", j = "\u251C", s = " ")} to get a similar behavior as in \code{fs::dir_tree()}. The defaults are: \code{list(h = "--" , v = "\u00A6", l = "\u00B0", j = "\u00A6", s = " ")}} \item{\code{parent}}{Gets or sets the parent \code{Node} of a \code{Node}. Only set this if you know what you are doing, as you might mess up the tree structure!} \item{\code{children}}{Gets or sets the children \code{list} of a \code{Node}. Only set this if you know what you are doing, as you might mess up the tree structure!} \item{\code{isLeaf}}{Returns \code{TRUE} if the \code{Node} is a leaf, \code{FALSE} otherwise} \item{\code{isRoot}}{Returns \code{TRUE} if the \code{Node} is the root, \code{FALSE} otherwise} \item{\code{count}}{Returns the number of children of a \code{Node}} \item{\code{totalCount}}{Returns the total number of \code{Node}s in the tree} \item{\code{path}}{Returns a vector of mode \code{character} containing the names of the \code{Node}s in the path from the root to this \code{Node}} \item{\code{pathString}}{Returns a string representing the path to this \code{Node}, separated by backslash} \item{\code{position}}{The position of a \code{Node} within its siblings} \item{\code{fields}}{Will be deprecated, use \code{attributes} instead} \item{\code{fieldsAll}}{Will be deprecated, use \code{attributesAll} instead} \item{\code{attributes}}{The attributes defined on this specific node} \item{\code{attributesAll}}{The distinct union of attributes defined on all the nodes in the tree spanned by this \code{Node}} \item{\code{levelName}}{Returns the name of the \code{Node}, preceded by level times '*'. Useful for printing and not typically called by package users.} \item{\code{leaves}}{Returns a list containing all the leaf \code{Node}s} \item{\code{leafCount}}{Returns the number of leaves are below a \code{Node}} \item{\code{level}}{Returns an integer representing the level of a \code{Node}. For example, the root has level 1.} \item{\code{height}}{Returns max(level) of any of the \code{Nodes} of the tree} \item{\code{isBinary}}{Returns \code{TRUE} if all \code{Node}s in the tree (except the leaves) have \code{count = 2}} \item{\code{root}}{Returns the root of a \code{Node} in a tree.} \item{\code{siblings}}{Returns a \code{list} containing all the siblings of this \code{Node}} \item{\code{averageBranchingFactor}}{Returns the average number of crotches below this \code{Node}} } \if{html}{\out{
}} } \section{Methods}{ \subsection{Public methods}{ \itemize{ \item \href{#method-Node-new}{\code{Node$new()}} \item \href{#method-Node-AddChild}{\code{Node$AddChild()}} \item \href{#method-Node-AddChildNode}{\code{Node$AddChildNode()}} \item \href{#method-Node-AddSibling}{\code{Node$AddSibling()}} \item \href{#method-Node-AddSiblingNode}{\code{Node$AddSiblingNode()}} \item \href{#method-Node-RemoveChild}{\code{Node$RemoveChild()}} \item \href{#method-Node-RemoveAttribute}{\code{Node$RemoveAttribute()}} \item \href{#method-Node-Sort}{\code{Node$Sort()}} \item \href{#method-Node-Revert}{\code{Node$Revert()}} \item \href{#method-Node-Prune}{\code{Node$Prune()}} \item \href{#method-Node-Climb}{\code{Node$Climb()}} \item \href{#method-Node-Navigate}{\code{Node$Navigate()}} \item \href{#method-Node-Get}{\code{Node$Get()}} \item \href{#method-Node-Do}{\code{Node$Do()}} \item \href{#method-Node-Set}{\code{Node$Set()}} \item \href{#method-Node-clone}{\code{Node$clone()}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-new}{}}} \subsection{Method \code{new()}}{ Create a new \code{Node} object. This is often used to create the root of a tree when creating a tree programmatically. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$new(name, check = c("check", "no-warn", "no-check"), ...)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{name}}{the name of the node to be created} \item{\code{check}}{Either \itemize{ \item{\code{"check"}: if the name conformance should be checked and warnings should be printed in case of non-conformance (the default)} \item{\code{"no-warn"}: if the name conformance should be checked, but no warnings should be printed in case of non-conformance (if you expect non-conformance)} \item{\code{"no-check" or FALSE}: if the name conformance should not be checked; use this if performance is critical. However, in case of non-conformance, expect cryptic follow-up errors} }} \item{\code{...}}{A name-value mapping of node attributes} } \if{html}{\out{
}} } \subsection{Returns}{ A new `Node` object } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{node <- Node$new("mynode", x = 2, y = "value of y") node$y } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-AddChild}{}}} \subsection{Method \code{AddChild()}}{ Creates a \code{Node} and adds it as the last sibling as a child to the \code{Node} on which this is called. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$AddChild(name, check = c("check", "no-warn", "no-check"), ...)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{name}}{the name of the node to be created} \item{\code{check}}{Either \itemize{ \item{\code{"check"}: if the name conformance should be checked and warnings should be printed in case of non-conformance (the default)} \item{\code{"no-warn"}: if the name conformance should be checked, but no warnings should be printed in case of non-conformance (if you expect non-conformance)} \item{\code{"no-check" or FALSE}: if the name conformance should not be checked; use this if performance is critical. However, in case of non-conformance, expect cryptic follow-up errors} }} \item{\code{...}}{A name-value mapping of node attributes} } \if{html}{\out{
}} } \subsection{Returns}{ The new \code{Node} (invisibly) } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{root <- Node$new("myroot", myname = "I'm the root") root$AddChild("child1", myname = "I'm the favorite child") child2 <- root$AddChild("child2", myname = "I'm just another child") child3 <- child2$AddChild("child3", myname = "Grandson of a root!") print(root, "myname") } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-AddChildNode}{}}} \subsection{Method \code{AddChildNode()}}{ Adds a \code{Node} as a child to this node. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$AddChildNode(child)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{child}}{The child \code{"Node"} to add.} } \if{html}{\out{
}} } \subsection{Returns}{ the child node added (this lets you chain calls) } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{root <- Node$new("myroot") child <- Node$new("mychild") root$AddChildNode(child) } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-AddSibling}{}}} \subsection{Method \code{AddSibling()}}{ Creates a new \code{Node} called \code{name} and adds it after this \code{Node} as a sibling. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$AddSibling(name, check = c("check", "no-warn", "no-check"), ...)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{name}}{the name of the node to be created} \item{\code{check}}{Either \itemize{ \item{\code{"check"}: if the name conformance should be checked and warnings should be printed in case of non-conformance (the default)} \item{\code{"no-warn"}: if the name conformance should be checked, but no warnings should be printed in case of non-conformance (if you expect non-conformance)} \item{\code{"no-check" or FALSE}: if the name conformance should not be checked; use this if performance is critical. However, in case of non-conformance, expect cryptic follow-up errors} }} \item{\code{...}}{A name-value mapping of node attributes} } \if{html}{\out{
}} } \subsection{Returns}{ the sibling node (this lets you chain calls) } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{#' root <- Node$new("myroot") child <- root$AddChild("child1") sibling <- child$AddSibling("sibling1") } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-AddSiblingNode}{}}} \subsection{Method \code{AddSiblingNode()}}{ Adds a \code{Node} after this \code{Node}, as a sibling. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$AddSiblingNode(sibling)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{sibling}}{The \code{"Node"} to add as a sibling.} } \if{html}{\out{
}} } \subsection{Returns}{ the added sibling node (this lets you chain calls, as in the examples) } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{root <- Node$new("myroot") child <- Node$new("mychild") sibling <- Node$new("sibling") root$AddChildNode(child)$AddSiblingNode(sibling) } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-RemoveChild}{}}} \subsection{Method \code{RemoveChild()}}{ Remove the child \code{Node} called \code{name} from a \code{Node} and returns it. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$RemoveChild(name)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{name}}{the name of the node to be created} } \if{html}{\out{
}} } \subsection{Returns}{ the subtree spanned by the removed child. } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{node <- Node$new("myroot")$AddChild("mychild")$root node$RemoveChild("mychild") } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-RemoveAttribute}{}}} \subsection{Method \code{RemoveAttribute()}}{ Removes attribute called \code{name} from this \code{Node}. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$RemoveAttribute(name, stopIfNotAvailable = TRUE)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{name}}{the name of the node to be created} \item{\code{stopIfNotAvailable}}{Gives an error if \code{stopIfNotAvailable} and the attribute does not exist.} } \if{html}{\out{
}} } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{node <- Node$new("mynode") node$RemoveAttribute("age", stopIfNotAvailable = FALSE) node$age <- 27 node$RemoveAttribute("age") node } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-Sort}{}}} \subsection{Method \code{Sort()}}{ Sort children of a \code{Node} or an entire \code{data.tree} structure \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$Sort(attribute, ..., decreasing = FALSE, recursive = TRUE)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{attribute}}{determines what is collected. The \code{attribute} can be \itemize{ \item a.) the name of a \bold{field} or a \bold{property/active} of each \code{Node} in the tree, e.g. \code{acme$Get("p")} or \code{acme$Get("position")} \item b.) the name of a \bold{method} of each \code{Node} in the tree, e.g. \code{acme$Get("levelZeroBased")}, where e.g. \code{acme$levelZeroBased <- function() acme$level - 1} \item c.) a \bold{function}, whose first argument must be a \code{Node} e.g. \code{acme$Get(function(node) node$cost * node$p)} }} \item{\code{...}}{any parameters to be passed on the the attribute (in case it's a method or a function)} \item{\code{decreasing}}{sort order} \item{\code{recursive}}{if \code{TRUE}, the method will be called recursively on the \code{Node}'s children. This allows sorting an entire tree.} } \if{html}{\out{
}} } \subsection{Details}{ You can sort with respect to any argument of the tree. But note that sorting has side-effects, meaning that you modify the underlying, original data.tree object structure. See also \code{\link{Sort}} for the equivalent function. } \subsection{Returns}{ Returns the node on which Sort is called, invisibly. This can be useful to chain Node methods. } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{data(acme) acme$Do(function(x) x$totalCost <- Aggregate(x, "cost", sum), traversal = "post-order") Sort(acme, "totalCost", decreasing = FALSE) print(acme, "totalCost") } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-Revert}{}}} \subsection{Method \code{Revert()}}{ Reverts the sort order of a \code{Node}'s children. See also \code{\link{Revert}} for the equivalent function. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$Revert(recursive = TRUE)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{recursive}}{if \code{TRUE}, the method will be called recursively on the \code{Node}'s children. This allows sorting an entire tree.} } \if{html}{\out{
}} } \subsection{Returns}{ returns the Node invisibly (for chaining) } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-Prune}{}}} \subsection{Method \code{Prune()}}{ Prunes a tree. Pruning refers to removing entire subtrees. This function has side-effects, it modifies your data.tree structure! See also \code{\link{Prune}} for the equivalent function. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$Prune(pruneFun)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{pruneFun}}{allows providing a a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} } \if{html}{\out{
}} } \subsection{Returns}{ the number of nodes removed } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{data(acme) acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum)) Prune(acme, function(x) x$cost > 700000) print(acme, "cost") } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-Climb}{}}} \subsection{Method \code{Climb()}}{ Climb a tree from parent to children, by provided criteria. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$Climb(...)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{...}}{an attribute-value pairlist to be searched. For brevity, you can also provide a character vector to search for names.} \item{\code{node}}{The root \code{\link{Node}} of the tree or subtree to climb} } \if{html}{\out{
}} } \subsection{Details}{ This method lets you climb the tree, from crutch to crutch. On each \code{Node}, the \code{Climb} finds the first child having attribute value equal to the the provided argument. See also \code{\link{Climb}} and \code{\link{Navigate}} Climb(node, ...) } \subsection{Returns}{ the \code{Node} having path \code{...}, or \code{NULL} if such a path does not exist } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{data(acme) #the following are all equivalent Climb(acme, 'IT', 'Outsource') Climb(acme, name = 'IT', name = 'Outsource') Climb(acme, 'IT')$Climb('Outsource') Navigate(acme, path = "IT/Outsource") Climb(acme, name = 'IT') Climb(acme, position = c(2, 1)) #or, equivalent: Climb(acme, position = 2, position = 1) Climb(acme, name = "IT", cost = 250000) tree <- CreateRegularTree(5, 2) tree$Climb(c("1", "1"), position = c(2, 2))$path } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-Navigate}{}}} \subsection{Method \code{Navigate()}}{ Navigate to another node by relative path. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$Navigate(path)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{path}}{A string or a character vector describing the path to navigate} \item{\code{node}}{The starting \code{\link{Node}} to navigate} } \if{html}{\out{
}} } \subsection{Details}{ The \code{path} is always relative to the \code{Node}. Navigation to the parent is defined by \code{..}, whereas navigation to a child is defined via the child's name. If path is provided as a string, then the navigation steps are separated by '/'. See also \code{\link{Navigate}} and \code{\link{Climb}} } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{data(acme) Navigate(acme$Research, "../IT/Outsource") Navigate(acme$Research, c("..", "IT", "Outsource")) } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-Get}{}}} \subsection{Method \code{Get()}}{ Traverse a Tree and Collect Values \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$Get( attribute, ..., traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), pruneFun = NULL, filterFun = NULL, format = FALSE, inheritFromAncestors = FALSE, simplify = c(TRUE, FALSE, "array", "regular") )}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{attribute}}{determines what is collected. The \code{attribute} can be \itemize{ \item a.) the name of a \bold{field} or a \bold{property/active} of each \code{Node} in the tree, e.g. \code{acme$Get("p")} or \code{acme$Get("position")} \item b.) the name of a \bold{method} of each \code{Node} in the tree, e.g. \code{acme$Get("levelZeroBased")}, where e.g. \code{acme$levelZeroBased <- function() acme$level - 1} \item c.) a \bold{function}, whose first argument must be a \code{Node} e.g. \code{acme$Get(function(node) node$cost * node$p)} }} \item{\code{...}}{in case the \code{attribute} is a function or a method, the ellipsis is passed to it as additional arguments.} \item{\code{traversal}}{defines the traversal order to be used. This can be \describe{ \item{pre-order}{Go to first child, then to its first child, etc.} \item{post-order}{Go to the first branch's leaf, then to its siblings, and work your way back to the root} \item{in-order}{Go to the first branch's leaf, then to its parent, and only then to the leaf's sibling} \item{level}{Collect root, then level 2, then level 3, etc.} \item{ancestor}{Take a node, then the node's parent, then that node's parent in turn, etc. This ignores the \code{pruneFun} } \item{function}{You can also provide a function, whose sole parameter is a \code{\link{Node}} object. The function is expected to return the node's next node, a list of the node's next nodes, or NULL.} } Read the data.tree vignette for a detailed explanation of these traversal orders.} \item{\code{pruneFun}}{allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} \item{\code{filterFun}}{allows providing a a filter, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. Note that if filter returns \code{FALSE}, then the node will be excluded from the result (but not the entire subtree).} \item{\code{format}}{if \code{FALSE} (the default), no formatting is being used. If \code{TRUE}, then the first formatter (if any) found along the ancestor path is being used for formatting (see \code{\link{SetFormat}}). If \code{format} is a function, then the collected value is passed to that function, and the result is returned.} \item{\code{inheritFromAncestors}}{if \code{TRUE}, then the path above a \code{Node} is searched to get the \code{attribute} in case it is NULL.} \item{\code{simplify}}{same as \code{\link{sapply}}, i.e. TRUE, FALSE or "array". Additionally, you can specify "regular" if each returned value is of length > 1, and equally named. See below for an example.} } \if{html}{\out{
}} } \subsection{Details}{ The \code{Get} method is one of the most important ones of the \code{data.tree} package. It lets you traverse a tree and collect values along the way. Alternatively, you can call a method or a function on each \code{\link{Node}}. See also \code{\link{Get}}, \code{\link{Node}}, \code{\link{Set}}, \code{\link{Do}}, \code{\link{Traverse}} } \subsection{Returns}{ a vector containing the \code{atrributes} collected during traversal, in traversal order. \code{NULL} is converted to NA, such that \code{length(Node$Get) == Node$totalCount} } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{data(acme) acme$Get("level") acme$Get("totalCount") acme$Get(function(node) node$cost * node$p, filterFun = isLeaf) #This is equivalent: nodes <- Traverse(acme, filterFun = isLeaf) Get(nodes, function(node) node$cost * node$p) #simplify = "regular" will preserve names acme$Get(function(x) c(position = x$position, level = x$level), simplify = "regular") } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-Do}{}}} \subsection{Method \code{Do()}}{ Executes a function on a set of nodes \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$Do( fun, ..., traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), pruneFun = NULL, filterFun = NULL )}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{fun}}{the function to execute. The function is expected to be either a Method, or to take a Node as its first argument} \item{\code{...}}{A name-value mapping of node attributes} \item{\code{traversal}}{defines the traversal order to be used. This can be \describe{ \item{pre-order}{Go to first child, then to its first child, etc.} \item{post-order}{Go to the first branch's leaf, then to its siblings, and work your way back to the root} \item{in-order}{Go to the first branch's leaf, then to its parent, and only then to the leaf's sibling} \item{level}{Collect root, then level 2, then level 3, etc.} \item{ancestor}{Take a node, then the node's parent, then that node's parent in turn, etc. This ignores the \code{pruneFun} } \item{function}{You can also provide a function, whose sole parameter is a \code{\link{Node}} object. The function is expected to return the node's next node, a list of the node's next nodes, or NULL.} } Read the data.tree vignette for a detailed explanation of these traversal orders.} \item{\code{pruneFun}}{allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} \item{\code{filterFun}}{allows providing a a filter, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. Note that if filter returns \code{FALSE}, then the node will be excluded from the result (but not the entire subtree).} } \if{html}{\out{
}} } \subsection{Details}{ See also \code{\link{Node}}, \code{\link{Get}}, \code{\link{Set}}, \code{\link{Traverse}} } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{data(acme) acme$Do(function(node) node$expectedCost <- node$p * node$cost) print(acme, "expectedCost") } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-Set}{}}} \subsection{Method \code{Set()}}{ Traverse a Tree and Assign Values \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$Set( ..., traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), pruneFun = NULL, filterFun = NULL )}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{...}}{each argument can be a vector of values to be assigned. Recycled.} \item{\code{traversal}}{defines the traversal order to be used. This can be \describe{ \item{pre-order}{Go to first child, then to its first child, etc.} \item{post-order}{Go to the first branch's leaf, then to its siblings, and work your way back to the root} \item{in-order}{Go to the first branch's leaf, then to its parent, and only then to the leaf's sibling} \item{level}{Collect root, then level 2, then level 3, etc.} \item{ancestor}{Take a node, then the node's parent, then that node's parent in turn, etc. This ignores the \code{pruneFun} } \item{function}{You can also provide a function, whose sole parameter is a \code{\link{Node}} object. The function is expected to return the node's next node, a list of the node's next nodes, or NULL.} } Read the data.tree vignette for a detailed explanation of these traversal orders.} \item{\code{pruneFun}}{allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} \item{\code{filterFun}}{allows providing a a filter, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. Note that if filter returns \code{FALSE}, then the node will be excluded from the result (but not the entire subtree).} } \if{html}{\out{
}} } \subsection{Details}{ The method takes one or more vectors as an argument. It traverses the tree, whereby the values are picked from the vector. Also available as OO-style method on \code{\link{Node}}. See also \code{\link{Node}}, \code{\link{Get}}, \code{\link{Do}}, \code{\link{Traverse}} } \subsection{Returns}{ invisibly returns the nodes (useful for chaining) } \subsection{Examples}{ \if{html}{\out{
}} \preformatted{data(acme) acme$Set(departmentId = 1:acme$totalCount, openingHours = NULL, traversal = "post-order") acme$Set(head = c("Jack Brown", "Mona Moneyhead", "Dr. Frank N. Stein", "Eric Nerdahl" ), filterFun = function(x) !x$isLeaf ) print(acme, "departmentId", "head") } \if{html}{\out{
}} } } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Node-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Node$clone(deep = FALSE)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{deep}}{Whether to make a deep clone.} } \if{html}{\out{
}} } } } ================================================ FILE: man/Prune.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods_sideeffect.R \name{Prune} \alias{Prune} \title{Prunes a tree.} \usage{ Prune(node, pruneFun) } \arguments{ \item{node}{The root of the sub-tree to be pruned} \item{pruneFun}{allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} } \value{ the number of nodes removed } \description{ Pruning refers to removing entire subtrees. This function has side-effects, it modifies your data.tree structure! } \examples{ data(acme) acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum)) Prune(acme, function(x) x$cost > 700000) print(acme, "cost") } \seealso{ \code{\link{Node}} } ================================================ FILE: man/Revert.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods_sideeffect.R \name{Revert} \alias{Revert} \title{Reverts the sort order of a \code{Node}'s children.} \usage{ Revert(node, recursive = TRUE) } \arguments{ \item{node}{the Node whose childrens' sort order is to be reverted} \item{recursive}{If \code{TRUE}, then revert is called recursively on all children.} } \value{ returns the Node invisibly (for chaining) } \description{ Reverts the sort order of a \code{Node}'s children. } \seealso{ \code{\link{Node}} \code{\link{Sort}} } ================================================ FILE: man/Set.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods_traversal.R \name{Set} \alias{Set} \title{Traverse a Tree and Assign Values} \usage{ #OO-style: # node$Set(..., # traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), # pruneFun = NULL, # filterFun = NULL) #traditional: Set(nodes, ...) } \arguments{ \item{nodes}{The nodes on which to perform the Get (typically obtained via \code{\link{Traverse}})} \item{...}{each argument can be a vector of values to be assigned. Recycled.} } \value{ invisibly returns the nodes (useful for chaining) } \description{ The method takes one or more vectors as an argument. It traverses the tree, whereby the values are picked from the vector. Also available as OO-style method on \code{\link{Node}}. } \examples{ data(acme) acme$Set(departmentId = 1:acme$totalCount, openingHours = NULL, traversal = "post-order") acme$Set(head = c("Jack Brown", "Mona Moneyhead", "Dr. Frank N. Stein", "Eric Nerdahl" ), filterFun = function(x) !x$isLeaf ) print(acme, "departmentId", "head") } \seealso{ \code{\link{Node}} \code{\link{Get}} \code{\link{Do}} \code{\link{Traverse}} } ================================================ FILE: man/SetFormat.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{SetFormat} \alias{SetFormat} \title{Set a formatter function on a specific node} \usage{ SetFormat(node, name, formatFun) } \arguments{ \item{node}{The node on which to set the formatter} \item{name}{The attribute name for which to set the formatter} \item{formatFun}{The formatter, i.e. a function taking a value as an input, and formatting returning the formatted value} } \description{ Formatter functions set on a Node act as a default formatter when printing and using the \code{\link{Get}} method. The formatter is inherited, meaning that whenever \code{Get} fetches an attribute from a \code{Node}, it checks on the \code{Node} or on any of its ancestors whether a formatter is set. } \examples{ data(acme) acme$Set(id = 1:(acme$totalCount)) SetFormat(acme, "id", function(x) FormatPercent(x, digits = 0)) SetFormat(Climb(acme, "IT"), "id", FormatFixedDecimal) print(acme, "id") # Calling Get with an explicit formatter will overwrite the default set on the Node: print(acme, id = acme$Get("id", format = function(x) paste0("id:", x))) # Or, to avoid formatters, even though you set them on a Node: print(acme, id = acme$Get("id", format = identity)) } \seealso{ Get print.Node } ================================================ FILE: man/Sort.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods_sideeffect.R \name{Sort} \alias{Sort} \title{Sort children of a \code{Node} or an entire \code{data.tree} structure} \usage{ Sort(node, attribute, ..., decreasing = FALSE, recursive = TRUE) } \arguments{ \item{node}{The node whose children are to be sorted} \item{attribute}{determines what is collected. The \code{attribute} can be \itemize{ \item a.) the name of a \bold{field} or a \bold{property/active} of each \code{Node} in the tree, e.g. \code{acme$Get("p")} or \code{acme$Get("position")} \item b.) the name of a \bold{method} of each \code{Node} in the tree, e.g. \code{acme$Get("levelZeroBased")}, where e.g. \code{acme$levelZeroBased <- function() acme$level - 1} \item c.) a \bold{function}, whose first argument must be a \code{Node} e.g. \code{acme$Get(function(node) node$cost * node$p)} }} \item{...}{any parameters to be passed on the the attribute (in case it's a method or a function)} \item{decreasing}{sort order} \item{recursive}{if \code{TRUE}, Sort will be called recursively on the \code{Node}'s children. This allows sorting an entire tree.} } \value{ Returns the node on which Sort is called, invisibly. This can be useful to chain Node methods. } \description{ You can sort with respect to any argument of the tree. But note that sorting has side-effects, meaning that you modify the underlying, original data.tree object structure. } \examples{ data(acme) acme$Do(function(x) x$totalCost <- Aggregate(x, "cost", sum), traversal = "post-order") Sort(acme, "totalCost", decreasing = FALSE) print(acme, "totalCost") } \seealso{ \code{\link{Node}} \code{\link{Revert}} } ================================================ FILE: man/ToDiagrammeRGraph.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_plot.R, R/util.R \name{plot.Node} \alias{plot.Node} \alias{ToDiagrammeRGraph} \alias{SetNodeStyle} \alias{SetEdgeStyle} \alias{SetGraphStyle} \alias{GetDefaultTooltip} \title{Plot a graph, or get a graphviz dot representation of the tree} \usage{ \method{plot}{Node}( x, ..., direction = c("climb", "descend"), pruneFun = NULL, output = "graph" ) ToDiagrammeRGraph(root, direction = c("climb", "descend"), pruneFun = NULL) SetNodeStyle(node, inherit = TRUE, keepExisting = FALSE, ...) SetEdgeStyle(node, inherit = TRUE, keepExisting = FALSE, ...) SetGraphStyle(root, keepExisting = FALSE, ...) GetDefaultTooltip(node) } \arguments{ \item{x}{The root node of the data.tree structure to plot} \item{...}{For the SetStyle methods, this can be any stlyeName / value pair. See http://graphviz.org/Documentation.php for details. For the plot.Node generic method, this is not used.} \item{direction}{when converting to a network, should the edges point from root to children ("climb") or from child to parent ("descend")?} \item{pruneFun}{allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} \item{output}{A string specifying the output type; \code{graph} (the default) renders the graph using the \code{\link[DiagrammeR:grViz]{grViz()}} function and \code{visNetwork} renders the graph using the \code{\link[DiagrammeR:visnetwork]{visnetwork()}} function.} \item{root}{The root \code{\link{Node}} of the data.tree structure to visualize.} \item{node}{The \code{\link{Node}} of the data.tree structure on which you would like to set style attributes.} \item{inherit}{If TRUE, then children will inherit this node's style. Otherwise they inherit from this node's parent. Note that the inherit always applies to the node, i.e. all style attributes of a node and not to a single style attribute.} \item{keepExisting}{If TRUE, then style attributes are added to possibly existing style attributes on the node.} } \description{ Use these methods to style your graph, and to plot it. The functionality is built around the DiagrammeR package, so for anything that goes beyond simple plotting, it is recommended to read its documentation at http://rich-iannone.github.io/DiagrammeR/docs.html. Note that DiagrammeR is only suggested by data.tree, so `plot` only works if you have installed it on your system. } \details{ Use \code{SetNodeStyle} and \code{SetEdgeStyle} to define the style of your plot. Use \code{plot} to display a graphical representation of your tree. The most common styles that can be set on the nodes are: \itemize{ \item{\code{color}} \item{\code{fillcolor}} \item{\code{fixedsize} true or false} \item{\code{fontcolor}} \item{\code{fontname}} \item{\code{fontsize}} \item{\code{height}} \item{\code{penwidth}} \item{\code{shape} box, ellipse, polygon, circle, box, etc.} \item{\code{style}} \item{\code{tooltip}} \item{\code{width}} } The most common styles that can be set on the edges are: \itemize{ \item{\code{arrowhead} e.g. normal, dot, vee} \item{\code{arrowsize}} \item{\code{arrowtail}} \item{\code{color}} \item{\code{dir} forward, back, both, none} \item{\code{fontcolor}} \item{\code{fontname}} \item{\code{fontsize}} \item{\code{headport}} \item{\code{label}} \item{\code{minlen}} \item{\code{penwidth}} \item{\code{tailport}} \item{\code{tooltip}} } A good source to understand the attributes is http://graphviz.org/Documentation.php. Another good source is the DiagrammeR package documentation, or more specifically: http://rich-iannone.github.io/DiagrammeR/docs.html In addition to the standard GraphViz functionality, the \code{data.tree} plotting infrastructure takes advantage of the fact that data.tree structure are always hierarchic. Thus, style attributes are inherited from parents to children on an individual basis. For example, you can set the fontcolor to red on a parent, and then all children will also have red font, except if you specifically disallow inheritance. Labels and tooltips are never inherited. Another feature concerns functions: Instead of setting a fixed value (e.g. \code{SetNodeStyle(acme, label = "Acme. Inc"}), you can set a function (e.g. \code{SetNodeStyle(acme, label = function(x) x$name)}). The function must take a \code{\link{Node}} as its single argument. Together with inheritance, this becomes a very powerful tool. The \code{GetDefaultTooltip} method is a utility method that can be used to print all attributes of a \code{\link{Node}}. There are some more examples in the 'applications' vignette, see \code{vignette('applications', package = "data.tree")} } \examples{ data(acme) SetGraphStyle(acme, rankdir = "TB") SetEdgeStyle(acme, arrowhead = "vee", color = "blue", penwidth = 2) #per default, Node style attributes will be inherited: SetNodeStyle(acme, style = "filled,rounded", shape = "box", fillcolor = "GreenYellow", fontname = "helvetica", tooltip = GetDefaultTooltip) SetNodeStyle(acme$IT, fillcolor = "LightBlue", penwidth = "5px") #inheritance can be avoided: SetNodeStyle(acme$Accounting, inherit = FALSE, fillcolor = "Thistle", fontcolor = "Firebrick", tooltip = "This is the accounting department") SetEdgeStyle(acme$Research$`New Labs`, color = "red", label = "Focus!", penwidth = 3, fontcolor = "red") #use Do to set style on specific nodes: Do(acme$leaves, function(node) SetNodeStyle(node, shape = "egg")) plot(acme) #print p as label, where available: SetNodeStyle(acme, label = function(node) node$p) plot(acme) } ================================================ FILE: man/ToNewick.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion.R \name{ToNewick} \alias{ToNewick} \title{Write a \code{data.tree} structure to Newick notation} \usage{ ToNewick(node, heightAttribute = DefaultPlotHeight, ...) } \arguments{ \item{node}{The root \code{Node} of a tree or sub-tree to be converted} \item{heightAttribute}{The attribute (field name, method, or function) storing or calculating the height for each \code{Node}} \item{...}{parameters that will be passed on the the heightAttributeName, in case it is a function} } \description{ To read from Newick, you can use the \code{ape} package, and convert the resulting \code{phylo} object to a \code{data.tree} structure. } \examples{ data(acme) ToNewick(acme) ToNewick(acme, heightAttribute = NULL) ToNewick(acme, heightAttribute = function(x) DefaultPlotHeight(x, 200)) ToNewick(acme, rootHeight = 200) } \seealso{ Other Conversions from Node: \code{\link{as.dendrogram.Node}()} } \concept{Conversions from Node} \keyword{Newick} ================================================ FILE: man/Traverse.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods_traversal.R \name{Traverse} \alias{Traverse} \title{Traverse a tree or a sub-tree} \usage{ Traverse( node, traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), pruneFun = NULL, filterFun = NULL ) } \arguments{ \item{node}{the root of a tree or a sub-tree that should be traversed} \item{traversal}{any of 'pre-order' (the default), 'post-order', 'in-order', 'level', 'ancestor', or a custom function (see details)} \item{pruneFun}{allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} \item{filterFun}{allows providing a a filter, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. Note that if filter returns \code{FALSE}, then the node will be excluded from the result (but not the entire subtree).} } \value{ a list of \code{Node}s } \description{ Traverse takes the root of a tree or a sub-tree, and "walks" the tree in a specific order. It returns a list of \code{\link{Node}} objects, filtered and pruned by \code{filterFun} and \code{pruneFun}. } \details{ The traversal order is as follows. (Note that these descriptions are not precise and complete. They are meant for quick reference only. See the data.tree vignette for a more detailed description). \describe{ \item{pre-order}{Go to first child, then to its first child, etc.} \item{post-order}{Go to the first branch's leaf, then to its siblings, and work your way back to the root} \item{in-order}{Go to the first branch's leaf, then to its parent, and only then to the leaf's sibling} \item{level}{Collect root, then level 2, then level 3, etc.} \item{ancestor}{Take a node, then the node's parent, then that node's parent in turn, etc. This ignores the \code{pruneFun} } \item{function}{You can also provide a function, whose sole parameter is a \code{\link{Node}} object. The function is expected to return the node's next node, a list of the node's next nodes, or NULL.} } } \seealso{ \code{\link{Node}} \code{\link{Get}} \code{\link{Set}} \code{\link{Do}} } ================================================ FILE: man/acme.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_doc.R \docType{data} \name{acme} \alias{acme} \title{Sample Data: A Simple Company with Departments} \format{ A data.tree root Node } \usage{ data(acme) } \description{ acme's tree representation is accessed through its root, acme. } \details{ \itemize{ \item cost, only available for leaf nodes. Cost of the project. \item p probability that a project will be undertaken. } } \keyword{datasets} ================================================ FILE: man/as.Node.BinaryTree.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_party.R \name{as.Node.BinaryTree} \alias{as.Node.BinaryTree} \title{Convert a a \code{SplitNode} from the party package to a \code{data.tree} structure.} \usage{ \method{as.Node}{BinaryTree}(x, ...) } \arguments{ \item{x}{The BinaryTree} \item{...}{additional arguments (unused)} } \description{ Convert a a \code{SplitNode} from the party package to a \code{data.tree} structure. } \examples{ library(party) airq <- subset(airquality, !is.na(Ozone)) airct <- ctree(Ozone ~ ., data = airq, controls = ctree_control(maxsurrogate = 3)) tree <- as.Node(airct) tree print(tree, "label", criterion = function(x) round(x$criterion$maxcriterion, 3), statistic = function(x) round(max(x$criterion$statistic), 3) ) FindNode(tree, 6)$path } ================================================ FILE: man/as.Node.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion.R \name{as.Node} \alias{as.Node} \title{Convert an object to a \code{data.tree} data structure} \usage{ as.Node(x, ...) } \arguments{ \item{x}{The object to be converted} \item{...}{Additional arguments} } \description{ Convert an object to a \code{data.tree} data structure } \seealso{ Other as.Node: \code{\link{as.Node.data.frame}()}, \code{\link{as.Node.dendrogram}()}, \code{\link{as.Node.list}()}, \code{\link{as.Node.phylo}()}, \code{\link{as.Node.rpart}()} } \concept{as.Node} ================================================ FILE: man/as.Node.data.frame.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_dataframe.R \name{as.Node.data.frame} \alias{as.Node.data.frame} \alias{FromDataFrameTable} \alias{FromDataFrameNetwork} \title{Convert a \code{data.frame} to a \code{data.tree} structure} \usage{ \method{as.Node}{data.frame}( x, ..., mode = c("table", "network"), pathName = "pathString", pathDelimiter = "/", colLevels = NULL, na.rm = TRUE ) FromDataFrameTable( table, pathName = "pathString", pathDelimiter = "/", colLevels = NULL, na.rm = TRUE, check = c("check", "no-warn", "no-check") ) FromDataFrameNetwork(network, check = c("check", "no-warn", "no-check")) } \arguments{ \item{x}{The data.frame in the required format.} \item{...}{Any other argument implementations of this might need} \item{mode}{Either "table" (if x is a data.frame in tree or table format) or "network"} \item{pathName}{The name of the column in x containing the path of the row} \item{pathDelimiter}{The delimiter used to separate nodes in \code{pathName}} \item{colLevels}{Nested list of column names, determining on what node levels the attributes are written to.} \item{na.rm}{If \code{TRUE}, then NA's are treated as NULL and values will not be set on nodes} \item{table}{a \code{data.frame} in table or tree format, i.e. having a row for each leaf (and optionally for additional nodes). There should be a column called \code{pathName}, separated by \code{pathDelimiter}, describing the path of each row.} \item{check}{Either \itemize{ \item{\code{"check"}: if the name conformance should be checked and warnings should be printed in case of non-conformance (the default)} \item{\code{"no-warn"}: if the name conformance should be checked, but no warnings should be printed in case of non-conformance (if you expect non-conformance)} \item{\code{"no-check" or FALSE}: if the name conformance should not be checked; use this if performance is critical. However, in case of non-conformance, expect cryptic follow-up errors} }} \item{network}{A \code{data.frame} in network format, i.e. it must adhere to the following requirements: \itemize{ \item{It must contain as many rows as there are nodes (excluding the root, there is no row for the root)} \item{Its first and second columns contain the network relationships. This can be either climbing (from parent to children) or descending (from child to parent)} \item{Its subsequent columns contain the attributes to be set on the nodes} \item{It must contain a single root} \item{There are no cycles in the network} }} } \value{ The root \code{Node} of the \code{data.tree} structure } \description{ Convert a \code{data.frame} to a \code{data.tree} structure } \examples{ data(acme) #Tree x <- ToDataFrameTree(acme, "pathString", "p", "cost") x xN <- as.Node(x) print(xN, "p", "cost") #Table x <- ToDataFrameTable(acme, "pathString", "p", "cost") x xN <- FromDataFrameTable(x) print(xN, "p", "cost") #More complex Table structure, using colLevels acme$Set(floor = c(1, 2, 3), filterFun = function(x) x$level == 2) x <- ToDataFrameTable(acme, "pathString", "floor", "p", "cost") x xN <- FromDataFrameTable(x, colLevels = list(NULL, "floor", c("p", "cost")), na.rm = TRUE) print(xN, "floor", "p", "cost") #Network x <- ToDataFrameNetwork(acme, "p", "cost", direction = "climb") x xN <- FromDataFrameNetwork(x) print(xN, "p", "cost") } \seealso{ \code{\link{as.data.frame.Node}} Other as.Node: \code{\link{as.Node.dendrogram}()}, \code{\link{as.Node.list}()}, \code{\link{as.Node.phylo}()}, \code{\link{as.Node.rpart}()}, \code{\link{as.Node}()} } \concept{as.Node} ================================================ FILE: man/as.Node.dendrogram.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_dendrogram.R \name{as.Node.dendrogram} \alias{as.Node.dendrogram} \title{Convert a \code{\link{dendrogram}} to a data.tree \code{Node}} \usage{ \method{as.Node}{dendrogram}( x, name = "Root", heightName = "plotHeight", check = c("check", "no-warn", "no-check"), ... ) } \arguments{ \item{x}{The dendrogram} \item{name}{The name of the root Node} \item{heightName}{The name under which the dendrogram's height is stored} \item{check}{Either \itemize{ \item{\code{"check"}: if the name conformance should be checked and warnings should be printed in case of non-conformance (the default)} \item{\code{"no-warn"}: if the name conformance should be checked, but no warnings should be printed in case of non-conformance (if you expect non-conformance)} \item{\code{"no-check" or FALSE}: if the name conformance should not be checked; use this if performance is critical. However, in case of non-conformance, expect cryptic follow-up errors} }} \item{...}{Additional parameters} } \value{ The root \code{Node} of a \code{data.tree} } \description{ Convert a \code{\link{dendrogram}} to a data.tree \code{Node} } \examples{ hc <- hclust(dist(USArrests), "ave") dend1 <- as.dendrogram(hc) tree1 <- as.Node(dend1) tree1$attributesAll tree1$totalCount tree1$leafCount tree1$height } \seealso{ Other as.Node: \code{\link{as.Node.data.frame}()}, \code{\link{as.Node.list}()}, \code{\link{as.Node.phylo}()}, \code{\link{as.Node.rpart}()}, \code{\link{as.Node}()} } \concept{as.Node} ================================================ FILE: man/as.Node.list.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_list.R \name{as.Node.list} \alias{as.Node.list} \alias{FromListExplicit} \alias{FromListSimple} \title{Convert a nested \code{list} structure to a \code{data.tree} structure} \usage{ \method{as.Node}{list}( x, mode = c("simple", "explicit"), nameName = "name", childrenName = "children", nodeName = NULL, interpretNullAsList = FALSE, check = c("check", "no-warn", "no-check"), ... ) FromListExplicit( explicitList, nameName = "name", childrenName = "children", nodeName = NULL, check = c("check", "no-warn", "no-check") ) FromListSimple( simpleList, nameName = "name", nodeName = NULL, interpretNullAsList = FALSE, check = c("check", "no-warn", "no-check") ) } \arguments{ \item{x}{The \code{list} to be converted.} \item{mode}{How the list is structured. "simple" (the default) will interpret any list to be a child. "explicit" assumes that children are in a nested list called \code{childrenName}} \item{nameName}{The name of the element in the list that should be used as the name, can be NULL if mode = explicit and the children lists are named, or if an automatic name (running number) should be assigned} \item{childrenName}{The name of the element that contains the child list (applies to mode 'explicit' only).} \item{nodeName}{A name suggestion for x, if the name cannot be deferred otherwise. This is for example the case for the root with mode explicit and named lists.} \item{interpretNullAsList}{If \code{TRUE}, then \code{NULL}-valued lists are interpreted as child nodes. Else, they are interpreted as attributes. This has only an effect if \code{mode} is "simple".} \item{check}{Either \itemize{ \item{\code{"check"}: if the name conformance should be checked and warnings should be printed in case of non-conformance (the default)} \item{\code{"no-warn"}: if the name conformance should be checked, but no warnings should be printed in case of non-conformance (if you expect non-conformance)} \item{\code{"no-check" or FALSE}: if the name conformance should not be checked; use this if performance is critical. However, in case of non-conformance, expect cryptic follow-up errors} }} \item{...}{Any other argument to be passed to generic sub implementations} \item{explicitList}{A \code{list} in which children are in a separate nested list called \code{childrenName}.} \item{simpleList}{A \code{list} in which children are stored as nested list alongside other attributes. Any list is interpreted as a child \code{Node}} } \description{ Convert a nested \code{list} structure to a \code{data.tree} structure } \examples{ kingJosephs <- list(name = "Joseph I", spouse = "Mary", born = "1818-02-23", died = "1839-08-29", children = list( list(name = "Joseph II", spouse = "Kathryn", born = "1839-03-28", died = "1865-12-19"), list(name = "Helen", born = "1840-17-08", died = "1845-01-01") ) ) FromListExplicit(kingJosephs) kingJosephs <- list(head = "Joseph I", spouse = "Mary", born = "1818-02-23", died = "1839-08-29", list(head = "Joseph II", spouse = "Kathryn", born = "1839-03-28", died = "1865-12-19"), list(head = "Helen", born = "1840-17-08", died = "1845-01-01") ) FromListSimple(kingJosephs, nameName = "head") kingJosephs <- list(spouse = "Mary", born = "1818-02-23", died = "1839-08-29", `Joseph II` = list(spouse = "Kathryn", born = "1839-03-28", died = "1865-12-19"), Helen = list(born = "1840-17-08", died = "1845-01-01") ) FromListSimple(kingJosephs, nodeName = "Joseph I") } \seealso{ Other as.Node: \code{\link{as.Node.data.frame}()}, \code{\link{as.Node.dendrogram}()}, \code{\link{as.Node.phylo}()}, \code{\link{as.Node.rpart}()}, \code{\link{as.Node}()} } \concept{as.Node} ================================================ FILE: man/as.Node.party.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_party.R \name{as.Node.party} \alias{as.Node.party} \title{Convert a a \code{party} from the partykit package to a \code{data.tree} structure.} \usage{ \method{as.Node}{party}(x, ...) } \arguments{ \item{x}{The party object} \item{...}{other arguments (unused)} } \description{ Convert a a \code{party} from the partykit package to a \code{data.tree} structure. } \examples{ library(partykit) data("WeatherPlay", package = "partykit") ### splits ### # split in overcast, humidity, and windy sp_o <- partysplit(1L, index = 1:3) sp_h <- partysplit(3L, breaks = 75) sp_w <- partysplit(4L, index = 1:2) ## query labels character_split(sp_o) ### nodes ### ## set up partynode structure pn <- partynode(1L, split = sp_o, kids = list( partynode(2L, split = sp_h, kids = list( partynode(3L, info = "yes"), partynode(4L, info = "no"))), partynode(5L, info = "yes"), partynode(6L, split = sp_w, kids = list( partynode(7L, info = "yes"), partynode(8L, info = "no"))))) pn ### tree ### ## party: associate recursive partynode structure with data py <- party(pn, WeatherPlay) tree <- as.Node(py) print(tree, "splitname", count = function(node) nrow(node$data), "splitLevel") SetNodeStyle(tree, label = function(node) paste0(node$name, ": ", node$splitname), tooltip = function(node) paste0(nrow(node$data), " observations"), fontname = "helvetica") SetEdgeStyle(tree, arrowhead = "none", label = function(node) node$splitLevel, fontname = "helvetica", penwidth = function(node) 12 * nrow(node$data)/nrow(node$root$data), color = function(node) { paste0("grey", 100 - as.integer( 100 * nrow(node$data)/nrow(node$root$data)) ) } ) Do(tree$leaves, function(node) { SetNodeStyle(node, shape = "box", color = ifelse(node$splitname == "yes", "darkolivegreen4", "lightsalmon4"), fillcolor = ifelse(node$splitname == "yes", "darkolivegreen1", "lightsalmon"), style = "filled,rounded", penwidth = 2 ) } ) plot(tree) } ================================================ FILE: man/as.Node.phylo.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_ape.R \name{as.Node.phylo} \alias{as.Node.phylo} \title{Convert a \code{phylo} object from the ape package to a \code{Node}} \usage{ \method{as.Node}{phylo}( x, heightName = "plotHeight", replaceUnderscores = TRUE, namesNotUnique = FALSE, ... ) } \arguments{ \item{x}{The phylo object to be converted} \item{heightName}{If the phylo contains edge lengths, then they will be converted to a height and stored in a field named according to this parameter (the default is "height")} \item{replaceUnderscores}{if TRUE (the default), then underscores in names are replaced with spaces} \item{namesNotUnique}{if TRUE, then the \code{name} of the \code{Node}s will be prefixed with a unique id. This is useful if the children of a parent have non-unique names.} \item{...}{any other parameter to be passed to sub-implementations} } \description{ Convert a \code{phylo} object from the ape package to a \code{Node} } \examples{ #which bird familes have the max height? library(ape) data(bird.families) bf <- as.Node(bird.families) height <- bf$height t <- Traverse(bf, filterFun = function(x) x$level == 25) Get(t, "name") } \seealso{ Other ape phylo conversions: \code{\link{GetPhyloNr}()}, \code{\link{as.phylo.Node}()} Other as.Node: \code{\link{as.Node.data.frame}()}, \code{\link{as.Node.dendrogram}()}, \code{\link{as.Node.list}()}, \code{\link{as.Node.rpart}()}, \code{\link{as.Node}()} } \concept{ape phylo conversions} \concept{as.Node} ================================================ FILE: man/as.Node.rpart.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_rpart.R \name{as.Node.rpart} \alias{as.Node.rpart} \title{Convert an \code{\link{rpart}} object to a \code{data.tree} structure} \usage{ \method{as.Node}{rpart}(x, digits = getOption("digits") - 3, use.n = FALSE, ...) } \arguments{ \item{x}{the \code{rpart} object to be converted} \item{digits}{the number of digits to be used for numeric values in labels} \item{use.n}{logical. Add cases to labels, see \code{\link{text.rpart}} for further information} \item{...}{any other argument to be passed to generic sub implementations} } \value{ a \code{data.tree} object. The tree contains a field \code{rpart.id} which references back to the original node id in the row names of the \code{rpart} object. } \description{ Convert an \code{\link{rpart}} object to a \code{data.tree} structure } \examples{ if (require(rpart)) { fit <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis) as.Node(fit) } } \seealso{ Other as.Node: \code{\link{as.Node.data.frame}()}, \code{\link{as.Node.dendrogram}()}, \code{\link{as.Node.list}()}, \code{\link{as.Node.phylo}()}, \code{\link{as.Node}()} } \concept{as.Node} ================================================ FILE: man/as.data.frame.Node.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_dataframe.R \name{as.data.frame.Node} \alias{as.data.frame.Node} \alias{ToDataFrameTree} \alias{ToDataFrameTable} \alias{ToDataFrameNetwork} \alias{ToDataFrameTypeCol} \title{Convert a \code{data.tree} structure to a \code{data.frame}} \usage{ \method{as.data.frame}{Node}( x, row.names = NULL, optional = FALSE, ..., traversal = c("pre-order", "post-order", "in-order", "level", "ancestor"), pruneFun = NULL, filterFun = NULL, format = FALSE, inheritFromAncestors = FALSE ) ToDataFrameTree(x, ..., pruneFun = NULL) ToDataFrameTable(x, ..., pruneFun = NULL) ToDataFrameNetwork( x, ..., direction = c("climb", "descend"), pruneFun = NULL, format = FALSE, inheritFromAncestors = FALSE ) ToDataFrameTypeCol(x, ..., type = "level", prefix = type, pruneFun = NULL) } \arguments{ \item{x}{The root \code{Node} of the tree or sub-tree to be convert to a data.frame} \item{row.names}{\code{NULL} or a character vector giving the row names for the data frame. Missing values are not allowed.} \item{optional}{logical. If \code{TRUE}, setting row names and converting column names (to syntactic names: see make.names) is optional.} \item{...}{the attributes to be added as columns of the data.frame. See \code{\link{Get}} for details. If a specific Node does not contain the attribute, \code{NA} is added to the data.frame.} \item{traversal}{any of 'pre-order' (the default), 'post-order', 'in-order', 'level', or 'ancestor'. See \code{\link{Traverse}} for details.} \item{pruneFun}{allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} \item{filterFun}{a function taking a \code{Node} as an argument. See \code{\link{Traverse}} for details.} \item{format}{if \code{FALSE} (the default), then no formatting will be applied. If \code{TRUE}, then the first formatter (if any) along the ancestor path is used for formatting.} \item{inheritFromAncestors}{if FALSE, and if the attribute is a field or a method, then only a \code{Node} itself is searched for the field/method. If TRUE, and if the \code{Node} does not contain the attribute, then ancestors are also searched.} \item{direction}{when converting to a network, should the edges point from root to children ("climb") or from child to parent ("descend")?} \item{type}{when converting type columns, the \code{type} is the discriminator, i.e. an attribute (e.g. field name) of each node} \item{prefix}{when converting type columns, the prefix used for the column names. Can be NULL to omit prefixes.} } \value{ ToDataFrameTree: a \code{data.frame}, where each row represents a \code{Node} in the tree or sub-tree spanned by \code{x}, possibly pruned according to \code{pruneFun}. ToDataFrameTable: a \code{data.frame}, where each row represents a leaf \code{Node} in the tree or sub-tree spanned by \code{x}, possibly pruned according to \code{pruneFun}. ToDataFrameNetwork: a \code{data.frame}, where each row represents a \code{Node} in the tree or sub-tree spanned by \code{x}, possibly pruned according to \code{pruneFun}. The first column is called 'from', while the second is called 'to', describing the parent to child edge (for direction "climb") or the child to parent edge (for direction "descend"). If \code{\link{AreNamesUnique}} is TRUE, then the Network is based on the \code{Node$name}, otherwise on the \code{Node$pathString} ToDataFrameTypeCol: a \code{data.frame} in table format (i.e. where each row represents a leaf in the tree or sub-tree spanned by \code{x}), possibly pruned according to \code{pruneFun}. In addition to \code{...}, each distinct \code{type} is output to a column. } \description{ If a node field contains data of length > 1, then that is converted into a string in the data.frame. } \examples{ data(acme) acme$attributesAll as.data.frame(acme, row.names = NULL, optional = FALSE, "cost", "p") ToDataFrameTree(acme, "cost", "p") ToDataFrameNetwork(acme, "cost", "p", direction = "climb") ToDataFrameTable(acme, "cost", "p") ToDataFrameTypeCol(acme) #use the pruneFun: acme$Do(function(x) x$totalCost <- Aggregate(x, "cost", sum), traversal = "post-order") ToDataFrameTree(acme, "totalCost", pruneFun = function(x) x$totalCost > 300000) #inherit acme$Set(floor = c(1, 2, 3), filterFun = function(x) x$level == 2) as.data.frame(acme, row.names = NULL, optional = FALSE, "floor", inheritFromAncestors = FALSE) as.data.frame(acme, row.names = NULL, optional = FALSE, "floor", inheritFromAncestors = TRUE) #using a function as an attribute: acme$Accounting$Head <- "Mrs. Numright" acme$Research$Head <- "Mr. Stein" acme$IT$Head <- "Mr. Squarehead" ToDataFrameTable(acme, department = function(x) x$parent$name, "name", "Head", "cost") #complex TypeCol acme$IT$Outsource$AddChild("India") acme$IT$Outsource$AddChild("Poland") acme$Set(type = c('company', 'department', 'project', 'project', 'department', 'project', 'project', 'department', 'program', 'project', 'project', 'project', 'project' ) ) print(acme, 'type') ToDataFrameTypeCol(acme, type = 'type') } ================================================ FILE: man/as.dendrogram.Node.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_dendrogram.R \name{as.dendrogram.Node} \alias{as.dendrogram.Node} \title{Convert a \code{Node} to a \code{dendrogram}} \usage{ \method{as.dendrogram}{Node}( object, heightAttribute = DefaultPlotHeight, edgetext = FALSE, ... ) } \arguments{ \item{object}{The Node to convert} \item{heightAttribute}{The attribute (field name or function) storing the height} \item{edgetext}{If TRUE, then the for non-leaf nodes the node name is stored as the dendrogram's edge text.} \item{...}{Additional parameters} } \value{ An object of class dendrogram } \description{ Convert a \code{data.tree} structure to a \code{\link{dendrogram}} } \examples{ data(acme) acmed <- as.dendrogram(acme) plot(acmed, center = TRUE) #you can take an attribute for the height: acme$Do( function(x) x$myPlotHeight <- (10 - x$level)) acmed <- as.dendrogram(acme, heightAttribute = "myPlotHeight") plot(acmed, center = TRUE) #or directly a function acmed <- as.dendrogram(acme, heightAttribute = function(x) 10 - x$level) plot(acmed) } \seealso{ Other Conversions from Node: \code{\link{ToNewick}()} } \concept{Conversions from Node} ================================================ FILE: man/as.igraph.Node.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_igraph.R \name{as.igraph.Node} \alias{as.igraph.Node} \title{Convert a \code{data.tree} structure to an igraph network} \usage{ as.igraph.Node( x, vertexAttributes = character(), edgeAttributes = character(), directed = FALSE, direction = c("climb", "descend"), ... ) } \arguments{ \item{x}{The root \code{Node} to convert} \item{vertexAttributes}{A vector of strings, representing the attributes in the \code{data.tree} structure to add as attributes to the vertices of the igraph} \item{edgeAttributes}{A vector of strings, representing the attributes in the \code{data.tree} structure to add as edge attributes of the igraph} \item{directed}{Logical scalar, whether or not to create a directed graph.} \item{direction}{when converting to a network, should the edges point from root to children ("climb") or from child to parent ("descend")?} \item{...}{Currently unused.} } \value{ an \code{igraph} object } \description{ This requires the igraph package to be installed. Also, this requires the names of the \code{Nodes} to be unique within the \code{data.tree} structure. } \examples{ data(acme) library(igraph) ig <- as.igraph(acme, "p", c("level", "isLeaf")) plot(ig) } \seealso{ AreNamesUnique } ================================================ FILE: man/as.list.Node.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_list.R \name{as.list.Node} \alias{as.list.Node} \alias{ToListSimple} \alias{ToListExplicit} \title{Convert a \code{data.tree} structure to a list-of-list structure} \usage{ \method{as.list}{Node}( x, mode = c("simple", "explicit"), unname = FALSE, nameName = ifelse(unname, "name", ""), childrenName = "children", rootName = "", keepOnly = NULL, pruneFun = NULL, ... ) ToListSimple(x, nameName = "name", pruneFun = NULL, ...) ToListExplicit( x, unname = FALSE, nameName = ifelse(unname, "name", ""), childrenName = "children", pruneFun = NULL, ... ) } \arguments{ \item{x}{The Node to convert} \item{mode}{How the list is structured. "simple" (the default) will add children directly as nested lists. "explicit" puts children in a separate nested list called \code{childrenName}} \item{unname}{If TRUE, and if \code{mode} is "explicit", then the nested children list will not have named arguments. This can be useful e.g. in the context of conversion to JSON, if you prefer the children to be an array rather than named objects.} \item{nameName}{The name that should be given to the name element} \item{childrenName}{The name that should be given to the children nested list} \item{rootName}{The name of the node. If provided, this overrides \code{Node$name}} \item{keepOnly}{A character vector of attributes to include in the result. If \code{NULL} (the default), all attributes are kept.} \item{pruneFun}{allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} \item{...}{Additional parameters passed to \code{as.list.Node}} } \description{ Convert a \code{data.tree} structure to a list-of-list structure } \examples{ data(acme) str(ToListSimple(acme)) str(ToListSimple(acme, keepOnly = "cost")) str(ToListExplicit(acme)) str(ToListExplicit(acme, unname = TRUE)) str(ToListExplicit(acme, unname = TRUE, nameName = "id", childrenName = "descendants")) } ================================================ FILE: man/as.phylo.Node.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_conversion_ape.R \name{as.phylo.Node} \alias{as.phylo.Node} \title{Convert a \code{Node} to a phylo object from the ape package.} \usage{ as.phylo.Node(x, heightAttribute = DefaultPlotHeight, ...) } \arguments{ \item{x}{The root \code{Node} of the tree or sub-tree to be converted} \item{heightAttribute}{The attribute (field name or function) storing the height} \item{...}{any other argument} } \description{ This method requires the ape package to be installed and loaded. } \examples{ library(ape) data(acme) acmephylo <- as.phylo(acme) #plot(acmephylo) } \seealso{ Other ape phylo conversions: \code{\link{GetPhyloNr}()}, \code{\link{as.Node.phylo}()} } \concept{ape phylo conversions} ================================================ FILE: man/averageBranchingFactor.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_actives.R \name{averageBranchingFactor} \alias{averageBranchingFactor} \title{Calculate the average number of branches each non-leaf has} \usage{ averageBranchingFactor(node) } \arguments{ \item{node}{The node to calculate the average branching factor for} } \description{ Calculate the average number of branches each non-leaf has } ================================================ FILE: man/data.tree.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/data.tree-package.R \docType{package} \name{data.tree} \alias{data.tree} \alias{data.tree-package} \title{data.tree: Hierarchical Data Structures} \description{ \code{data.tree} is to hierarchical data what \code{data.frame} is to tabular data: An extensible, general purpose structure to store, manipulate, and display hierarchical data. } \section{Introduction}{ Hierarchical data is ubiquitous in statistics and programming (XML, search trees, family trees, classification, file system, etc.). However, no general-use \bold{tree data structure} is available in R. Where tabular data has \code{data.frame}, hierarchical data is often modeled in lists of lists or similar makeshifts. These structures are often difficult to manage. This is where the \code{data.tree} package steps in. It lets you build trees of hierarchical data for various uses: to print, to rapid prototype search algorithms, to test out new classification algorithms, and much more. } \section{Tree Traversal}{ \code{data.tree} allows to \code{\link{Traverse}} trees in various orders (pre-order, post-order, level, etc.), and it lets you run operations on \code{\link{Node}s} via \code{\link{Do}}. Similarly, you can collect and store data while traversing a tree using the \code{\link{Get}} and the \code{\link{Set}} methods. } \section{Methods}{ The package also contains utility functions to \code{\link{Sort}}, to \code{\link{Prune}}, to \code{\link{Aggregate}} and \code{\link{Cumulate}} and to \code{\link{print}} in custom formats. } \section{Construction and Conversion}{ The package also contains many conversions from and to data.tree structures. Check out the see also section of \code{\link{as.Node}}. You can construct a tree from a \code{data.frame} using \code{\link{as.Node.data.frame}}, and convert it back using \code{\link{as.data.frame.Node}}. Similar options exist for list of lists. For more specialized conversions, see \code{\link{as.dendrogram.Node}}, \code{\link{as.Node.dendrogram}}, \code{\link{as.phylo.Node}} and \code{\link{as.Node.phylo}} Finally, easy conversion options from and to list, dataframe, JSON, YAML, igraph, ape, rpart, party and more exist: \itemize{ \item{list: both directions} \item{dataframe: both directions} \item{JSON, YAML: both directions, via lists} \item{igraph: from igraph to data.tree} \item{ape: both directions} \item{rpart: from rpart to data.tree} \item{party: from party to data.tree} } } \section{Node and Reference Semantics}{ The entry point to the package is \code{\link{Node}}. Each tree is composed of a number of \code{Node}s, referencing each other. One of most important things to note about \code{data.tree} is that it exhibits \bold{reference semantics}. In a nutshell, this means that you can modify your tree along the way, without having to reassign it to a variable after each modification. By and large, this is a rather exceptional behavior in R, where value-semantics is king most of the time. } \section{Applications}{ \code{data.tree} is not optimised for computational speed, but for implementation speed. Namely, its memory footprint is relatively large compared to traditional R data structures. However, it can easily handle trees with several thousand nodes, and once a tree is constructed, operations on it are relatively fast. data.tree is always useful when \itemize{ \item{you want to develop and test a new algorithm} \item{you want to import and convert tree structures (it imports and exports to list-of-list, data.frame, yaml, json, igraph, dendrogram, phylo and more)} \item{you want to play around with data, display it and get an understanding} \item{you want to test another package, to compare it with your own results} \item{you need to do homework} } For a quick overview of the features, read the \code{\link{data.tree}} vignette by running \code{vignette("data.tree")}. For stylized applications, see \code{vignette("applications", package='data.tree')} } \examples{ data(acme) print(acme) acme$attributesAll acme$count acme$totalCount acme$isRoot acme$height print(acme, "p", "cost") outsource <- acme$IT$Outsource class(outsource) print(outsource) outsource$attributes outsource$isLeaf outsource$level outsource$path outsource$p outsource$parent$name outsource$root$name outsource$expCost <- outsource$p * outsource$cost print(acme, "expCost") acme$Get("p") acme$Do(function(x) x$expCost <- x$p * x$cost) acme$Get("expCost", filterFun = isLeaf) ToDataFrameTable(acme, "name", "p", "cost", "level", "pathString") ToDataFrameTree(acme, "name", "p", "cost", "level") ToDataFrameNetwork(acme, "p", "cost") } \seealso{ \code{\link{Node}} For more details, see the \code{data.tree} vignette by running: \code{vignette("data.tree")} } \author{ \strong{Maintainer}: Christoph Glur \email{christoph.glur@powerpartners.pro} (R interface) Other contributors: \itemize{ \item Russ Hyde (improve dependencies) [contributor] \item Chris Hammill (improve getting) [contributor] \item Facundo Munoz (improve list conversion) [contributor] \item Markus Wamser (fixed some typos) [contributor] \item Pierre Formont (additional features) [contributor] \item Kent Russel (documentation) [contributor] \item Noam Ross (fixes) [contributor] \item Duncan Garmonsway (fixes) [contributor] } } \keyword{internal} ================================================ FILE: man/isLeaf.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_actives.R \name{isLeaf} \alias{isLeaf} \title{Check if a \code{Node} is a leaf} \usage{ isLeaf(node) } \arguments{ \item{node}{The Node to test.} } \value{ TRUE if the Node is a leaf, FALSE otherwise } \description{ Check if a \code{Node} is a leaf } ================================================ FILE: man/isNotLeaf.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_actives.R \name{isNotLeaf} \alias{isNotLeaf} \title{Check if a \code{Node} is not a leaf} \usage{ isNotLeaf(node) } \arguments{ \item{node}{The Node to test.} } \value{ FALSE if the Node is a leaf, TRUE otherwise } \description{ Check if a \code{Node} is not a leaf } ================================================ FILE: man/isNotRoot.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_actives.R \name{isNotRoot} \alias{isNotRoot} \title{Check if a \code{Node} is not a root} \usage{ isNotRoot(node) } \arguments{ \item{node}{The Node to test.} } \value{ FALSE if the Node is the root, TRUE otherwise } \description{ Check if a \code{Node} is not a root } ================================================ FILE: man/isRoot.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_actives.R \name{isRoot} \alias{isRoot} \title{Check if a \code{Node} is the root} \usage{ isRoot(node) } \arguments{ \item{node}{The Node to test.} } \value{ TRUE if the Node is the root, FALSE otherwise } \description{ Check if a \code{Node} is the root } ================================================ FILE: man/mushroom.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_doc.R \docType{data} \name{mushroom} \alias{mushroom} \title{Sample Data: Data Used by the ID3 Vignette} \format{ data.frame } \usage{ data(mushroom) } \description{ mushroom contains attributes of mushrooms. We can use this data to predict a mushroom's toxicity based on its attributes. The attributes available in the data set are: } \details{ \itemize{ \item color the color of a mushroom \item size whether a mushroom is small or large \item points whether a mushroom has points \item edibility whether a mushroom is edible or toxic } } \keyword{datasets} ================================================ FILE: man/print.Node.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/node_methods.R \name{print.Node} \alias{print.Node} \title{Print a \code{Node} in a human-readable fashion.} \usage{ \method{print}{Node}( x, ..., pruneMethod = c("simple", "dist", NULL), limit = 100, pruneFun = NULL, row.names = T ) } \arguments{ \item{x}{The Node} \item{...}{Node attributes to be printed. Can be either a character (i.e. the name of a Node field), a Node method, or a function taking a Node as a single argument. See \code{Get} for details on the meaning of \code{attribute}.} \item{pruneMethod}{The method can be used to prune for printing in a simple way. If NULL, the entire tree is displayed. If "simple", then only the first \code{limit} nodes are displayed. If "dist", then Nodes are removed everywhere in the tree, according to their level. If pruneFun is provided, then pruneMethod is ignored.} \item{limit}{The maximum number of nodes to print. Can be \code{NULL} if the entire tree should be printed.} \item{pruneFun}{allows providing a prune criteria, i.e. a function taking a \code{Node} as an input, and returning \code{TRUE} or \code{FALSE}. If the pruneFun returns FALSE for a Node, then the Node and its entire sub-tree will not be considered.} \item{row.names}{If \code{TRUE} (default), then the row names are printed out. Else, they are not.} } \description{ Print a \code{Node} in a human-readable fashion. } \examples{ data(acme) print(acme, "cost", "p") print(acme, "cost", probability = "p") print(acme, expectedCost = function(x) x$cost * x$p) do.call(print, c(acme, acme$attributesAll)) tree <- CreateRegularTree(4, 5) # print entire tree: print(tree, pruneMethod = NULL) # print first 20 nodes: print(tree, pruneMethod = "simple", limit = 20) # print 20 nodes, removing leafs first: print(tree, pruneMethod = "dist", limit = 20) # provide your own pruning function: print(tree, pruneFun = function(node) node$position != 2) } ================================================ FILE: man/s3_register.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/register-s3.R \name{s3_register} \alias{s3_register} \title{Register a method for a suggested dependency} \usage{ s3_register(generic, class, method = NULL) } \arguments{ \item{generic}{Name of the generic in the form `pkg::generic`.} \item{class}{Name of the class} \item{method}{Optionally, the implementation of the method. By default, this will be found by looking for a function called `generic.class` in the package environment. Note that providing `method` can be dangerous if you use devtools. When the namespace of the method is reloaded by `devtools::load_all()`, the function will keep inheriting from the old namespace. This might cause crashes because of dangling `.Call()` pointers.} } \description{ Code copied into data.tree from `vctrs` (authors Wickham H, Henry L, Vaughan D; https://github.com/r-lib/vctrs) } \details{ Generally, the recommend way to register an S3 method is to use the `S3Method()` namespace directive (often generated automatically be the `@export` roxygen2 tag). However, this technique requires that the generic be in an imported package, and sometimes you want to suggest a package, and only provide a method when that package is loaded. `s3_register()` can be called from your package's `.onLoad()` to dynamically register a method only if the generic's package is loaded. (To avoid taking a dependency on vctrs for this one function, please feel free to copy and paste the function source into your own package.) For R 3.5.0 and later, `s3_register()` is also useful when demonstrating class creation in a vignette, since method lookup no longer always involves the lexical scope. For R 3.6.0 and later, you can achieve a similar effect by using "delayed method registration", i.e. placing the following in your `NAMESPACE` file: ``` if (getRversion() >= "3.6.0") { S3method(package::generic, class) } ``` } \examples{ # A typical use case is to dynamically register tibble/pillar methods # for your class. That way you avoid creating a hard depedency on packages # that are not essential, while still providing finer control over # printing when they are used. .onLoad <- function(...) { s3_register("pillar::pillar_shaft", "vctrs_vctr") s3_register("tibble::type_sum", "vctrs_vctr") } } \keyword{internal} ================================================ FILE: publish-cheat-sheet.md ================================================ Before a release, do the following: 1. Set date in DESCRIPTION 2. Make sure NEWS is up to date 3. make sure tests pass (by running devtools::test()) 4. review documentation, especially Node 5. review vignettes (especially if png need updating). Run devtools::spell_check() 6. Check build by running devtools::check() 7. Commit to git, make sure travis and appveyor pass (in case it fails, you may want to clean cache) 8. make sure we have adequate coverage 9. Make sure devel passes, by running devtools::check_win_devel 10. Make sure r-oldrel passes (easiest is to run it on local windows) 11. Run devtools::check_rhub() 12. Run rhub::check_for_cran() -> if any of these fail, go back to 6.! 13. check reverse dependencies by running revdepcheck::revdep_check(, num_workers = 2, timeout = as.difftime(60, units="mins")) (from github if not yet published to CRAN devtools::install_github("r-lib/revdepcheck") 14. update cran-comments.md 15. merge into master and push 16. create release on github in master, tag it as pre-release 17. submit it to cran by calling devtools::release() 18. once accepted by CRAN, remove the pre-release flag on github ================================================ FILE: tests/testthat/test-draw.R ================================================ context("plot") test_that("plot only works if DiagrammeR is installed", { # Given # - an object of class "Node"--"R6" # - in an R session where DiagrammeR is not installed # When # - the user tries to construct a DiagrammeR-based graph or plot # Then # - an error is thrown skip_if_not_installed("mockery") data(acme) mockery::stub(ToDiagrammeRGraph, "requireNamespace", FALSE, 1) mockery::stub(plot.Node, "requireNamespace", FALSE, 1) expect_error( ToDiagrammeRGraph(acme), info = "ToDiagrammeRGraph should fail if DiagrammeR is not installed") expect_error( plot(acme), info = "plot() should fail if DiagrammeR is not installed") }) test_that("grViz", { testthat::skip_if_not_installed("DiagrammeR", minimum_version = "1.0.0") data(acme) SetGraphStyle(acme, rankdir = "TB") SetEdgeStyle(acme, arrowhead = "vee", color = "grey35", penwidth = 2) #per default, Node style attributes will be inherited: SetNodeStyle(acme, style = "filled,rounded", shape = "box", fillcolor = "GreenYellow", fontname = "helvetica", tooltip = GetDefaultTooltip) SetNodeStyle(acme$IT, fillcolor = "LightBlue", penwidth = "5px") #inheritance can be avoided: SetNodeStyle(acme$Accounting, inherit = FALSE, fillcolor = "Thistle", fontcolor = "Firebrick", tooltip = "This is the accounting department") #use Do to set style on specific nodes: Do(acme$leaves, function(node) SetNodeStyle(node, shape = "egg")) graph <- ToDiagrammeRGraph(acme, direction = "descend", pruneFun = function(x) x$level < 3) gv <- DiagrammeR::generate_dot(graph) expect_equal(substr(gv, 1, 9), "digraph {") }) test_that("grViz single attribute", { testthat::skip_if_not_installed("DiagrammeR", minimum_version = "1.0.0") data(acme) SetNodeStyle(acme$Accounting, label = "Mimi") graph <- ToDiagrammeRGraph(acme) gv <- DiagrammeR::generate_dot(graph) exp <- "digraph { '1' [label = 'Acme Inc.'] '2' [label = 'Mimi'] '3' [label = 'New Software'] '4' [label = 'New Accounting Standards'] '5' [label = 'Research'] '6' [label = 'New Product Line'] '7' [label = 'New Labs'] '8' [label = 'IT'] '9' [label = 'Outsource'] '10' [label = 'Go agile'] '11' [label = 'Switch to R'] '1'->'2' '1'->'5' '1'->'8' '2'->'3' '2'->'4' '5'->'6' '5'->'7' '8'->'9' '8'->'10' '8'->'11' }" expect_equal(gv, exp) }) test_that("grViz single attribute names not uniuqe", { testthat::skip_if_not_installed("DiagrammeR", minimum_version = "1.0.0") mytree <- CreateRegularTree(3, 3) mytree$Do(function(x) x$name <- x$position) SetNodeStyle(mytree, label = "Root") SetNodeStyle(mytree$`1`, tooltip = "L1") graph <- ToDiagrammeRGraph(mytree) gv <- DiagrammeR::generate_dot(graph) exp <- "digraph { '1' [label = 'Root', tooltip = ''] '2' [label = 'Root', tooltip = 'L1'] '3' [label = 'Root', tooltip = ''] '4' [label = 'Root', tooltip = ''] '5' [label = 'Root', tooltip = ''] '6' [label = 'Root', tooltip = ''] '7' [label = 'Root', tooltip = ''] '8' [label = 'Root', tooltip = ''] '9' [label = 'Root', tooltip = ''] '10' [label = 'Root', tooltip = ''] '11' [label = 'Root', tooltip = ''] '12' [label = 'Root', tooltip = ''] '13' [label = 'Root', tooltip = ''] '1'->'2' '1'->'6' '1'->'10' '2'->'3' '2'->'4' '2'->'5' '6'->'7' '6'->'8' '6'->'9' '10'->'11' '10'->'12' '10'->'13' }" expect_equal(gv, exp) }) test_that("grViz names with quotes", { mytree <- Node$new("my_root") mytree$AddChild("A")$AddChild("\"B\"")$AddChild("\"C\"")$AddChild("D") exp_lab <- c("my_root", "A", "\\\"B\\\"", "\\\"C\\\"", "D") expect_equal(ToDiagrammeRGraph(mytree)$nodes_df$label, exp_lab) }) ================================================ FILE: tests/testthat/test-treeConstruction.R ================================================ #library(data.tree) context("tree construction") data(acme) test_that("isRoot", { data(acme) expect_equal(acme$isRoot, TRUE) expect_equal(acme$IT$isRoot, FALSE) expect_equal(acme$IT$`Go agile`$isRoot, FALSE) }) test_that("count", { expect_equal(acme$count, 3) }) test_that("totalCount", { expect_equal(acme$totalCount, 11) }) test_that("Climb", { node <- Climb(acme, "Accounting", "New Accounting Standards") expect_equal(node$name, "New Accounting Standards") node <- Climb(acme, "Not existing node") expect_equal(node, NULL) }) test_that("isLeaf", { node <- Climb(acme, "Accounting", "New Accounting Standards") expect_equal(node$isLeaf, TRUE) }) test_that("level", { expect_equal(acme$isLeaf, FALSE) accounting <- Climb(acme, "Accounting") expect_equal(accounting$isLeaf, FALSE) node <- accounting$Climb("New Accounting Standards") expect_equal(node$isLeaf, TRUE) }) ================================================ FILE: tests/testthat/test-treeConversionApe.R ================================================ context("tree conversion ape") test_that("as.Node.phylo owls", { skip_if_not_installed("ape") txt <- "owls(((Strix_aluco:4.2,Asio_otus:4.2):3.1,Athene_noctua:7.3):6.3,Tyto_alba:13.5);" p <- ape::read.tree(text = txt) n <- as.Node(p, replaceUnderscore = F) expect_equal(n$totalCount, 7) expect_equal(as.vector(n$Get("name")), c("5", "6", "7", "Strix_aluco", "Asio_otus", "Athene_noctua", "Tyto_alba")) expect_equal(as.vector(n$Get("level")), c(1, 2, 3, 4, 4, 3, 2)) }) test_that("as.Node.phylo height", { skip_if_not_installed("ape") txt <- "(A:5,B:5,(C:10,D:10)E:5):0;" p <- ape::read.tree(text = txt) n <- as.Node(p) expect_equal(n$totalCount, 6) expect_equal(as.vector(n$Get("name")), c("", "A", "B", "E", "C", "D")) expect_equal(as.vector(n$Get("level")), c(1, 2, 2, 2, 3, 3)) expect_equal(as.vector(n$Get("plotHeight")), c(15, 10, 10, 10, 0, 0)) }) test_that("as.Node.phylo no height", { skip_if_not_installed("ape") txt <- "(A,B,(C,D)E)F;" p <- ape::read.tree(text = txt) n <- as.Node(p) expect_equal(n$totalCount, 6) expect_equal(as.vector(n$Get("name")), c("F", "A", "B", "E", "C", "D")) expect_equal(as.vector(n$Get("level")), c(1, 2, 2, 2, 3, 3)) expect_true(all(is.na(n$Get("edgeLength")))) }) test_that("as.Node.phylo height non standard", { skip_if_not_installed("ape") txt <- "(A:5,B:5,(C:10,D:10):5):0;" p <- ape::read.tree(text = txt) n <- as.Node(p, heightName = "edge") expect_equal(n$totalCount, 6) expect_equal(as.vector(n$Get("name")), c("5", "A", "B", "6", "C", "D")) expect_equal(as.vector(n$Get("level")), c(1, 2, 2, 2, 3, 3)) expect_equal(as.vector(n$Get("edge")), c(15, 10, 10, 10, 0, 0)) }) test_that("as.phylo.Node heightAttributeName", { skip_if_not_installed("ape") data(acme) #needs explicit generics as library ape is not loaded p <- as.phylo.Node(acme) n <- as.Node(p) expect_equal(n$Get("name"), acme$Get("name")) }) test_that("as.phylo.Node heightAttributeName", { skip_if_not_installed("ape") data(acme) height <- function(x) x$edgeHeight <- DefaultPlotHeight(x) + 1 acme$Do(height) #needs explicit generics as library ape is not loaded p <- as.phylo.Node(acme, heightAttributeName = "edgeHeight") n <- as.Node(p) expect_equal(n$Get("name"), acme$Get("name")) gh <- function(x) { if (x$isRoot) { x$edgeHeight <- 0 return() } if (x$parent$isRoot) ph <- 0 else ph <- x$parent$edgeLength x$edgeHeight <- x$edgeLength - ph } n$Do(gh) expect_equal(n$Get("edgeLength"), acme$Get("edgeLength")) }) test_that("GetPhyloNumber node", { data(acme) acme$Do(function(x) x$phyloNr <- GetPhyloNr(x, "node")) expect_equal(as.vector(acme$Get("phyloNr")), c(8,9,1,2,10,3,4,11,5,6,7)) }) test_that("GetPhyloNumber edge", { data(acme) acme$Do(function(x) x$phyloNr <- GetPhyloNr(x, "edge"), filterFun = isNotRoot) expect_equal(as.vector(acme$Get("phyloNr")), c(NA, 1:10)) }) ================================================ FILE: tests/testthat/test-treeConversionDataFrame.R ================================================ context("tree conversion data.frame") data(acme) test_that("as.Node.data.frame", { data(acme) acmedf <- as.data.frame(acme, row.names = NULL, optional = FALSE, 'p', 'cost', 'pathString') acme2 <- as.Node(acmedf, na.rm = TRUE) expect_equal(as.list(acme), as.list(acme2)) expect_true(is.null(acme2$children[[1]]$p)) expect_equal(as.data.frame(acme, row.names = NULL, optional = FALSE, 'p', 'cost'), as.data.frame(acme2, row.names = NULL, optional = FALSE, 'p', 'cost')) #test that if they are not different it fails # acc2 <- acme2$Climb("Accounting") # acc2$newField <- 'new value' # expect_equal(as.list(acme), as.list(acme2)) }) test_that("FromDataFrameTable no extra column", { pathString <- c("a/b/c/d", "a/b/c/e", "a/f") df <- data.frame(pathString) tree <- FromDataFrameTable(df) expect_equal(Get(tree$leaves, "name"), c(d = "d", e = "e", f = "f")) }) test_that("FromDataFrameTable reserved words", { pathString <- c("a/b/c/d", "a/b/c/e", "a/f") value <- c("d", "e", "f") df <- data.frame(pathString, value, stringsAsFactors = FALSE) #no warn expect_warning(tree <- FromDataFrameTable(df, na.rm = TRUE), NA) expect_equal(Get(tree$leaves, "value"), c(d = "d", e = "e", f = "f")) expect_warning(tree <- FromDataFrameTable(df, na.rm = TRUE, check = "no-warn"), NA) expect_equal(Get(tree$leaves, "value"), c(d = "d", e = "e", f = "f")) #reserved words pathString <- c("name/path/height/count", "name/path/height/e", "name/leaves") value <- c("d", "e", "f") df <- data.frame(pathString, value, stringsAsFactors = FALSE) expect_that(tree <- FromDataFrameTable(df, na.rm = TRUE), gives_warning()) expect_equal(Get(tree$leaves, "value"), c(count2 = "d", e = "e", leaves2 = "f")) df <- data.frame(pathString, value, stringsAsFactors = FALSE) expect_warning(tree <- FromDataFrameTable(df, na.rm = TRUE, check = "no-warn"), NA) expect_equal(Get(tree$leaves, "value"), c(count2 = "d", e = "e", leaves2 = "f")) }) test_that("FromDataFrameNetwork reserved words", { parent <- c("a", "a", "b", "c", "c") child <- c("b", "f", "c", "d", "e") value <- c(0:4) network_df <- data.frame(parent, child, value, stringsAsFactors = FALSE) #no warn expect_warning(tree <- FromDataFrameNetwork(network_df), regexp = NA) #reserved words parent <- c("a", "a", "b", "c", "c") child <- c("b", "f", "c", "d", "e") name <- c(0:4) network_df <- data.frame(parent, child, name, stringsAsFactors = FALSE) expect_that(tree <- FromDataFrameNetwork(network_df), gives_warning()) expect_warning(tree <- FromDataFrameNetwork(network_df, check = "no-warn"), NA) }) test_that("as.data.frame.Node", { data(acme) acmedf <- as.data.frame(acme, row.names = NULL, optional = FALSE, myp = 'p', 'cost', pstr = function(x) x$pathString, sg = acme$Get( function(x) x$p) ) expect_equal(names(acmedf), c("levelName", "myp", "cost", "pstr", "sg")) expect_equal(acmedf[2, 4], "Acme Inc./Accounting") expect_equal(acmedf$sg, acmedf$sg) }) test_that("as.data.frame.Node list attributes", { data(acme) acme$Set(data = list(list(list(a = 1, b = "a"))), filterFun = isLeaf) acme$Set(data = list(list(list(b = "c"))), filterFun = function(n) isNotLeaf(n) && isNotRoot(n)) expect_identical(as.data.frame(acme, data = "data")$data, c(NA, "c", "1, a", "1, a", "c", "1, a", "1, a", "c", "1, a", "1, a", "1, a")) }) test_that("ToDataFrameTable", { data(acme) acme$myfield <- "yes" acmedf <- ToDataFrameTable(acme, myp = "p", "cost", "myfield", pstr = function(x) x$pathString) expect_equal(names(acmedf), c("myp", "cost", "myfield", "pstr")) expect_equal(acmedf[2, 4], "Acme Inc./Accounting/New Accounting Standards") expect_equal(nrow(acmedf), acme$leafCount) expect_true(all(acmedf$myfield == "yes")) }) test_that("ToDataFrameNetwork climb", { data(acme) acmedf <- ToDataFrameNetwork(acme, "p", direction = "climb") expect_equal(names(acmedf), c("from", "to", "p")) expect_equal(acmedf$to, c("Accounting", "Research", "IT", "New Software", "New Accounting Standards", "New Product Line", "New Labs", "Outsource", "Go agile", "Switch to R")) expect_equal(acmedf$from, c("Acme Inc.", "Acme Inc.", "Acme Inc.", "Accounting", "Accounting", "Research", "Research", "IT", "IT", "IT")) }) test_that("ToDataFrameNetwork descend", { data(acme) acmedf <- ToDataFrameNetwork(acme, "p", direction = "descend") expect_equal(names(acmedf), c("from", "to", "p")) expect_equal(acmedf$from, c("Accounting", "Research", "IT", "New Software", "New Accounting Standards", "New Product Line", "New Labs", "Outsource", "Go agile", "Switch to R")) expect_equal(acmedf$to, c("Acme Inc.", "Acme Inc.", "Acme Inc.", "Accounting", "Accounting", "Research", "Research", "IT", "IT", "IT")) }) test_that("ToDataFrame sub-tree", { data(acme) it <- acme$Climb("IT") df <- ToDataFrameTree(it) expect_equal(dim(df), c(4, 1)) expect_equal(stri_sub(df[1, 1], 1, 2), 'IT') }) test_that("ToDataFrameTypeCol level", { data(acme) acme$IT$Outsource$AddChild("India") acme$IT$Outsource$AddChild("Poland") acmedf <- ToDataFrameTypeCol(acme) expect_equal(names(acmedf), c('level_1', 'level_2', 'level_3', 'level_4')) expect_true( all(acmedf$level_1 == 'Acme Inc.')) expect_equal(acmedf$level_2, c('Accounting', 'Accounting', 'Research', 'Research', 'IT', 'IT', 'IT', 'IT')) expect_equal(acmedf$level_3, c('New Software', 'New Accounting Standards', 'New Product Line', 'New Labs', 'Outsource', 'Outsource', 'Go agile', 'Switch to R')) expect_equal(acmedf$level_4, c(NA, NA, NA, NA, 'India', 'Poland', NA, NA)) }) test_that("ToDataFrameTypeCol type", { data(acme) acme$IT$Outsource$AddChild("India") acme$IT$Outsource$AddChild("Poland") acme$Set(type = c('company', 'department', 'project', 'project', 'department', 'project', 'project', 'department', 'program', 'project', 'project', 'project', 'project')) acmedf <- ToDataFrameTypeCol(acme, type = 'type', prefix = NULL) expect_equal(names(acmedf), c('company', 'department', 'program', 'project')) expect_true( all(acmedf$company == 'Acme Inc.')) expect_equal(acmedf$department, c('Accounting', 'Accounting', 'Research', 'Research', 'IT', 'IT', 'IT', 'IT')) expect_equal(acmedf$program, c(NA, NA, NA, NA, 'Outsource', 'Outsource', NA, NA)) expect_equal(acmedf$project, c('New Software', 'New Accounting Standards', 'New Product Line', 'New Labs', 'India', 'Poland', 'Go agile', 'Switch to R')) }) test_that("FromDataFrameTable col-levels", { data(acme) acme$Set(floor = c(1, 2, 3), filterFun = function(x) x$level == 2) x <- ToDataFrameTable(acme, "pathString", "floor", "p", "cost") xN <- FromDataFrameTable(x, colLevels = list(NULL, "floor", c("p", "cost")), na.rm = TRUE) expect_equal(xN$Climb("Accounting")$floor, 1) expect_true(is.null(xN$Climb("Accounting", "New Accounting Standards")$floor)) expect_true(is.null(xN$floor)) expect_equal(xN$Climb("Accounting", "New Accounting Standards")$p, 0.75) }) test_that("FromDataFrameNetwork descend", { data(acme) x <- ToDataFrameNetwork(acme, "p", "cost", direction = "descend") xN <- FromDataFrameNetwork(x) expect_equal(xN$totalCount, acme$totalCount) expect_equal(xN$Get("name"), acme$Get("name")) expect_equal(xN$Get("p"), acme$Get("p")) expect_equal(xN$height, acme$height) expect_equal(xN$Get("level"), acme$Get("level")) expect_equal(xN$Get(function(x) x$parent$name), acme$Get(function(x) x$parent$name)) expect_equal(xN$Get("isLeaf"), acme$Get("isLeaf")) }) test_that("FromDataFrameNetwork climb", { data(acme) x <- ToDataFrameNetwork(acme, "p", "cost", direction = "climb") xN <- FromDataFrameNetwork(x) expect_equal(xN$totalCount, acme$totalCount) expect_equal(xN$Get("name"), acme$Get("name")) expect_equal(xN$Get("p"), acme$Get("p")) expect_equal(xN$height, acme$height) expect_equal(xN$Get("level"), acme$Get("level")) expect_equal(xN$Get(function(x) x$parent$name), acme$Get(function(x) x$parent$name)) expect_equal(xN$Get("isLeaf"), acme$Get("isLeaf")) }) test_that("FromDataFrameNetwork order", { data(acme) x <- ToDataFrameNetwork(acme, "p", "cost") odr <- c(4, 1, 6, 8, 9, 10, 2, 7, 5, 3) x <- x[odr, ] xN <- FromDataFrameNetwork(x) expect_equal(xN$Get("name"), acme$Get("name")) x <- x[, c('to', 'from', 'p', 'cost')] xN <- FromDataFrameNetwork(x) expect_equal(xN$Get("name"), acme$Get("name")) }) ================================================ FILE: tests/testthat/test-treeConversionDendrogram.R ================================================ context("tree conversion dendrogram") test_that("as.Node.dendrogram", { hc <- hclust(dist(USArrests), "ave") dend1 <- as.dendrogram(hc) root <- as.Node(dend1) expect_equal(root$totalCount, 99) expect_true(root$isBinary) }) test_that("as.dendrogram.Node", { data(acme) acmed <- as.dendrogram(acme) expect_equal(class(acmed), "dendrogram") expect_equal(nobs(acmed), acme$leafCount) expect_equal(attr(acmed, "height"), 100) }) ================================================ FILE: tests/testthat/test-treeConversionList.R ================================================ context("tree conversion") data(acme) test_that("as.list.Node explicit", { data(acme) l <- as.list(acme, mode = "explicit") expect_equal("list", class(l)) expect_equal(2, length(l)) expect_equal(c('name', 'children'), names(l)) expect_equal(c('children'), names(l$children$Research)) expect_equal(0.9, l$children$Research$children$`New Labs`$p) }) test_that("as.list.Node explicit nameName=name", { data(acme) l <- as.list(acme, mode = "explicit", nameName = 'name') expect_equal(class(l), "list") expect_equal(length(l), 2) expect_equal(names(l), c('name', 'children')) expect_equal(names(l$children$Research), c('name', 'children')) expect_equal(l$children$Research$children$`New Labs`$p, 0.9) }) test_that("as.list.Node explicit nameName=id", { data(acme) l <- as.list(acme, mode = "explicit", nameName = 'id') expect_equal(class(l), "list") expect_equal(length(l), 2) expect_equal(names(l), c('id', 'children')) expect_equal(names(l$children$Research), c('id', 'children')) expect_equal(l$children$Research$children$`New Labs`$p, 0.9) }) test_that("as.list.Node simple", { data(acme) l <- as.list(acme) expect_equal("list", class(l)) expect_equal(length(l), 4) expect_equal(names(l), c("name", "Accounting", "Research", "IT")) expect_equal(names(l$Research), c("New Product Line", "New Labs" )) expect_equal(0.9, l$Research$`New Labs`$p) }) test_that("as.list.Node simple unname no effect", { data(acme) l <- as.list(acme) expect_equal("list", class(l)) expect_equal(length(l), 4) expect_equal(names(l), c("name", "Accounting", "Research", "IT")) expect_equal(names(l$Research), c("New Product Line", "New Labs" )) expect_equal(0.9, l$Research$`New Labs`$p) }) test_that("as.list.Node simple nameName=name", { data(acme) l <- as.list(acme, nameName = 'name') expect_equal("list", class(l)) expect_equal(length(l), 4) expect_equal(names(l), c('name', "Accounting", "Research", "IT")) expect_equal(names(l$Research), c("name", "New Product Line", "New Labs" )) expect_equal(0.9, l$Research$`New Labs`$p) }) test_that("as.list.Node simple keepOnly=p", { data(acme) l <- as.list(acme, keepOnly = 'p') expect_equal("list", class(l)) expect_equal(length(l), 4) expect_equal(names(l), c('name', "Accounting", "Research", "IT")) expect_equal(names(l$Research), c("New Product Line", "New Labs" )) expect_equal(0.9, l$Research$`New Labs`$p) expect_null(l$Research$`New Labs`$cost) }) test_that("as.list.Node explicit nameName=id", { l <- as.list(acme, nameName = 'id') expect_equal("list", class(l)) expect_equal(length(l), 4) expect_equal(names(l), c('id', "Accounting", "Research", "IT")) expect_equal(names(l$Research), c("id", "New Product Line", "New Labs" )) expect_equal(0.9, l$Research$`New Labs`$p) }) test_that("as.Node.list", { data(acme) n <- as.Node(as.list(acme)) expect_equal("Acme Inc.", n$name) expect_equal(3, n$count) expect_equal(11, n$totalCount) expect_equal(0.05, n$Climb("IT", "Go agile")$p) }) test_that("as.list.Node unname", { data(acme) l <- as.list(acme, mode = "explicit", unname = TRUE, nameName = 'id', childrenName = 'sub') expect_equal("list", class(l)) expect_equal(2, length(l)) expect_equal(c('id', 'sub'), names(l)) expect_equal(0.9, l$sub[[2]]$sub[[2]]$p) expect_equal('New Product Line', l$sub[[2]]$sub[[1]]$id) }) test_that("as.Node.list unname with mode = simple", { l <- as.list(acme, unname = TRUE, nameName = 'id', childrenName = 'sub') n <- as.Node(l, nameName = 'id', childrenName = 'sub') expect_equal("Acme Inc.", n$name) expect_equal(3, n$count) expect_equal(11, n$totalCount) expect_equal(0.05, n$Climb("IT", "Go agile")$p) }) test_that("as.Node.list auto", { lol <- list(type = "Root", list(type = "Rule", value = 1), list(type = "Rule", value = 2)) tree <- FromListSimple(lol, nameName = NULL, nodeName = 1) expect_equal(tree$totalCount, 3) expect_equal(unname(tree$Get("name")), as.character(c(1, 1, 2))) expect_equal(tree$children[[1]]$type, "Rule") }) test_that("as.Node.list warning", { lol <- list(type = "Root", list(type = "Rule", count = 1), list(type = "Rule", count = 2)) #tree <- FromListSimple(lol, nameName = NULL, nodeName = 1) tree <- NULL expect_warning(FromListSimple(lol, nameName = NULL, nodeName = 1, check = "no-warn"), NA) expect_that(tree <- FromListSimple(lol, nameName = NULL, nodeName = 1), gives_warning()) expect_equal(tree$totalCount, 3) expect_equal(unname(tree$Get("name")), as.character(c(1, 1, 2))) expect_equal(unname(tree$Get("count")), c(2,0,0)) expect_equal(unname(tree$Get("count2")), c(NA, 1, 2)) expect_warning(FromListSimple(lol, nameName = NULL, nodeName = 1, check = "no-check"), NA) expect_that(tree <- FromListSimple(lol, nameName = NULL, nodeName = 1), gives_warning()) }) test_that("as.Node.list string", { yaml <- " children: CR: description: Currencies type: Currency children: CR_CHF: market CR_EUR: market CR_USD: market " lol <- yaml::yaml.load(yaml) tree <- FromListExplicit(lol) expect_equal(tree$totalCount, 5) expect_equal(tree$height, 3) expect_equal(tree$CR$CR_CHF$name, "CR_CHF") #market is numbered (1) instead of lost: expect_equal(tree$attributesAll, c("description", "type", "1")) }) test_that("as.Node.list string 2", { yaml <- " children: CR: description: Currencies type: Currency children: - CR_CHF - CR_EUR - CR_USD " lol <- yaml::yaml.load(yaml) tree <- FromListExplicit(lol) expect_equal(tree$totalCount, 5) expect_equal(tree$height, 3) expect_equal(tree$CR$CR_CHF$name, "CR_CHF") }) test_that("as.Node.list string NULL", { yaml <- " name: OS Students 2014/15 OS X: Yosemite: Leopard: Linux: Debian: version: 9 Ubuntu: Windows: W7: W8: W10: " lol <- yaml::yaml.load(yaml) tree <- FromListSimple(lol, interpretNullAsList = TRUE) expect_equal(tree$totalCount, 11) expect_equal(tree$height, 3) expect_equal(tree$Linux$Debian$name, "Debian") expect_equal(tree$Linux$Debian$version, 9) }) test_that("as.Node.list empty list()", { lol = list(a = list(aa=1), b="hello", c = list()) tree = FromListSimple(lol) expect_equal(tree$totalCount, 3) expect_equal(tree$height, 2) expect_equal(tree$c$count, 0) expect_equal(tree$c$attributes, character(0)) }) test_that("as.Node.list with empty attributes", { lol = list(a = list(aa=1), "hello", c = list(), 1, d=2, list(e=4)) tree = FromListSimple(lol) expect_equal(length(tree$attributes), 3) expect_equal(tree$count, 3) expect_true(all(c(1, 2) %in% tree$attributes)) }) test_that("as.list.Node prune", { data(acme) l <- ToListExplicit(acme, pruneFun = function(node) node$name != 'Research') expect_equal("list", class(l)) expect_equal(2, length(l$children)) expect_equal(c('Accounting', 'IT'), names(l$children)) }) test_that("as.list.Node prune", { data(acme) l <- as.list(acme, pruneFun = function(node) node$name != 'Outsource') expect_equal("list", class(l)) expect_equal(c('Go agile', 'Switch to R'), names(l$IT)) }) ================================================ FILE: tests/testthat/test-treeConversionParty.R ================================================ context("tree conversion party") test_that("party on", { skip_if_not_installed("party") airq <- subset(airquality, !is.na(Ozone)) airct <- party::ctree(Ozone ~ ., data = airq, controls = party::ctree_control(maxsurrogate = 3)) tree <- as.Node(airct) res <- as.numeric(unname(tree$Get("name"))) expect_equal(res, 1:9) res <- tree$Get("label") exp <- c(`1` = "Temp <= 82", `2` = "Wind <= 6.9", `3` = "weights = 10", `4` = "Temp > 77", `5` = "weights = 48", `6` = "weights = 21", `7` = "Wind > 10.3", `8` = "weights = 30", `9` = "weights = 7" ) expect_equal(res, exp) }) test_that("partykid", { skip_if_not_installed("party") #hack but needed, otherwise extree_data cannot be found library(partykit) airq <- subset(airquality, !is.na(Ozone)) airct <- partykit::ctree(Ozone ~ ., data = airq) tree <- as.Node(airct) res <- as.numeric(unname(tree$Get("name"))) expect_equal(res, 1:9) res <- tree$Get("splitname") exp <- c(`1` = "Temp", `2` = "Wind", `3` = NA, `4` = "Temp", `5` = NA, `6` = NA, `7` = "Wind", `8` = NA, `9` = NA ) expect_equal(res, exp) res <- tree$Get("splitLevel") exp <- c(`1` = NA, `2` = "<= 82", `3` = "<= 6.9", `4` = "> 6.9", `5` = "<= 77", `6` = "> 77", `7` = "> 82", `8` = "<= 10.3", `9` = "> 10.3") expect_equal(res, exp) }) ================================================ FILE: tests/testthat/test-treeConversionRpart.R ================================================ context("tree conversion rpart") test_that("Conversion from rpart", { skip_if_not_installed("rpart") fit <- rpart::rpart(Kyphosis ~ Age + Number + Start, data = rpart::kyphosis) tree <- as.Node(fit) expect_equal(tree$totalCount, NROW(fit$frame)) expect_true(tree$isBinary) expect_equal(tree$leafCount, sum(fit$frame$var == "")) expect_true(all(tree$Get("name", filterFun = isNotLeaf) %in% labels(fit))) expect_equivalent(tree$Get("rpart.id"), as.numeric(rownames(fit$frame))) }) ================================================ FILE: tests/testthat/test-treeConversionigraph.R ================================================ context("tree conversion igraph") test_that("as.Node.igraph undirected", { skip_if_not_installed("igraph") data(acme) ig <- as.igraph.Node(acme, "p", c("level", "isLeaf"), directed = FALSE) #expect_true(is_hierarchical(ig)) expect_false(igraph::is_directed(ig)) expect_equal(igraph::gsize(ig), acme$totalCount - 1) }) test_that("as.Node.igraph directed", { skip_if_not_installed("igraph") data(acme) ig <- as.igraph.Node(acme, "p", c("level", "isLeaf"), directed = TRUE) #expect_true(is_hierarchical(ig)) expect_true(igraph::is_directed(ig)) expect_equal(igraph::gsize(ig), acme$totalCount - 1) }) ================================================ FILE: tests/testthat/test-treeDocu.R ================================================ context("tree docu method") ================================================ FILE: tests/testthat/test-treeMethods.R ================================================ context("tree methods") test_that("Node instantiation", { expect_equal( Node$new()$name, "", info="Confirm default name is an empty string" ) n <- Node$new("bla") expect_equal(n$name, "bla") n <- Node$new("bla", check = "check") expect_equal(n$name, "bla") n <- Node$new("bla", check = "no-check") expect_equal(n$name, "bla") n <- Node$new("bla", check = "no-warn") expect_equal(n$name, "bla") n <- Node$new("bla", check = "whatever") expect_equal(n$name, "bla") expect_that(n <- Node$new("name"), gives_warning()) expect_equal(n$name, "name2") expect_that(n <- Node$new("name", check = "check"), gives_warning()) expect_equal(n$name, "name2") expect_warning((n <- Node$new("name", check = "no-check")), regexp = NA) expect_equal(n$name, "name") expect_warning(n <- Node$new("name", check = FALSE), regexp = NA) expect_equal(n$name, "name") expect_warning(n <- Node$new("name", check = "no-warn"), regexp = NA) expect_equal(n$name, "name2") expect_that(n <- Node$new("name", check = "whatever"), gives_warning()) expect_equal(n$name, "name2") expect_error( Node$new(NA_character_), regexp="Node name must be a non-NA character" ) expect_error( Node$new(c("A", "B")), regexp="Node name must be a scalar" ) }) test_that("Climb NULL", { data(acme) expect_equal(Climb(acme, 'X'), NULL) expect_equal(Climb(acme, 'X', 'Y', 'Z'), NULL) expect_equal(Climb(acme, 'IT', 'X'), NULL) }) test_that("Climb Equivalent", { data(acme) expect_equal(Climb(acme, 'IT', 'Go agile'), Climb(acme, 'IT')$Climb('Go agile')) }) test_that("Climb 3rd Level", { data(acme) Climb(acme, 'IT', 'Go agile')$AddChild('MyTest')$AddChild('MyTest2') expect_equal("MyTest2", Climb(acme, 'IT', 'Go agile', 'MyTest', 'MyTest2')$name ) expect_equal("MyTest2", Climb(acme, c('IT', 'Go agile', 'MyTest', 'MyTest2'))$name ) expect_equal("MyTest2", Climb(acme, name = c('IT', 'Go agile', 'MyTest', 'MyTest2'))$name ) }) test_that("Climb non-name", { tree <- CreateRegularTree(5, 2) p <- tree$Climb(c("1.1", "1.1.1"), position = c(2, 2))$path expect_equal(c("1", "1.1", "1.1.1", "1.1.1.2", "1.1.1.2.2"), p) }) test_that("Find", { data(acme) os <- FindNode(acme, "Outsource") expect_equal(os$name, "Outsource") os <- FindNode(acme, "XYZ") expect_null(os) acme$Accounting$AddChild("Outsource") os <- FindNode(acme, "Outsource") expect_equal(class(os), c("Node", "R6")) expect_equal(os$name, "Outsource") }) test_that("Get prune", { data(acme) acme$Set(myvalue = c(1.3, 1.5, 0.9, 1, 2, 1.1, 0.8, -1, 0.7, 1.0, 1.01)) myFilter <- function(x) { return (!is.null(x$myvalue) && x$myvalue > 1) } get <- acme$Get("myvalue", pruneFun = myFilter) #NOTE: 1.01 is filtered out because its parent is -1! exp <- c(1.3, 1.5, 2, 1.1) names(exp) <- c('Acme Inc.', 'Accounting', 'Research', 'New Product Line') expect_equal(get, exp) }) test_that("Get filter", { data(acme) acme$Set(myvalue = c(1.3, 1.5, 0.9, 1, 2, 1.1, 0.8, -1, 0.7, 1.0, 1.01)) myFilter <- function(x) { return (!is.null(x$myvalue) && x$myvalue > 1) } get <- acme$Get("myvalue", filterFun = myFilter) exp <- c(1.3, 1.5, 2, 1.1, 1.01) names(exp) <- c('Acme Inc.', 'Accounting', 'Research', 'New Product Line', 'Switch to R') expect_equal(get, exp) }) test_that("Get pre-order", { data(acme) get <- acme$Get("name", traversal = "pre-order") exp <- c('Acme Inc.', 'Accounting', 'New Software', 'New Accounting Standards', 'Research', 'New Product Line', 'New Labs', 'IT', 'Outsource', 'Go agile', 'Switch to R' ) names(exp) <- exp expect_equal(get, exp) }) test_that("Get post-order", { data(acme) get <- acme$Get("name", traversal = "post-order") exp <- c('New Software', 'New Accounting Standards', 'Accounting', 'New Product Line', 'New Labs', 'Research', 'Outsource', 'Go agile', 'Switch to R', 'IT', 'Acme Inc.') names(exp) <- exp expect_equal(get, exp) }) test_that("Get ancestor", { data(acme) get <- Climb(acme, 'Research', 'New Labs')$Get("name", traversal = "ancestor") exp <- c('New Labs', 'Research', 'Acme Inc.') names(exp) <- exp expect_equal(get, exp) }) test_that("GetAttribute matrix", { data(acme) acme$IT$matrix <- diag(2) res <- GetAttribute(acme$IT, "matrix") expect_equal(acme$IT$matrix, res) }) test_that("Get format", { data(acme) calculateAggregateChildCost <- function(node, fun) { if (node$isLeaf) node$averageCost <- node$cost else node$averageCost <- fun(sapply(node$children, function(x) x$averageCost)) } myFormat <- function(x) { format(x, nsmall=2, scientific = FALSE) } acme$Do(calculateAggregateChildCost, mean, traversal = "post-order") get <- acme$Get("averageCost", format = myFormat)["New Product Line"] expect_equal(as.character(get), "2000000.00") }) test_that("Traverse pre-order", { data(acme) tr <- Traverse(acme, traversal = "pre-order") nms <- sapply(tr, function(x) x$name) exp <- c("Acme Inc.", "Accounting", "New Software", "New Accounting Standards", "Research", "New Product Line", "New Labs", "IT", "Outsource", "Go agile", "Switch to R") expect_equal(nms, exp) }) test_that("Traverse post-order", { data(acme) tr <- Traverse(acme, traversal = "post-order") nms <- sapply(tr, function(x) x$name) exp <- c("New Software", "New Accounting Standards", "Accounting", "New Product Line", "New Labs", "Research", "Outsource", "Go agile", "Switch to R", "IT", "Acme Inc." ) expect_equal(nms, exp) }) test_that("Traverse in-order", { data(acme) tr <- Traverse(acme, traversal = "level") nms <- sapply(tr, function(x) x$name) exp <- c("Acme Inc.", "Accounting", "Research", "IT", "New Software", "New Accounting Standards", "New Product Line", "New Labs", "Outsource", "Go agile", "Switch to R" ) expect_equal(nms, exp) }) test_that("Traverse empty filter", { data(acme) tr <- Traverse(acme, filterFun = function(x) x$name == "Marketing") nms <- sapply(tr, function(x) x$name) exp <- vector(mode = "list") expect_equal(nms, exp) }) test_that("Traverse empty filter level", { data(acme) tr <- Traverse(acme, traversal = "level", filterFun = function(x) x$name == "Marketing") nms <- sapply(tr, function(x) x$name) exp <- vector(mode = "list") expect_equal(nms, exp) }) test_that("Traverse custom method", { CustomTraversalFunction <- function(node) { if (node$isLeaf) return (NULL) return (node$children[[1]]) } data(acme) tr <- Traverse(acme, traversal = CustomTraversalFunction, filterFun = function(x) x$name != "Accounting") nms <- sapply(tr, function(x) x$name) exp <- c("Acme Inc.", "New Software") expect_equal(nms, exp) }) test_that("Traverse custom method multi", { CustomTraversalFunction <- function(node) { if (node$isLeaf) return (NULL) return (node$children) } data(acme) tr <- Traverse(acme, traversal = CustomTraversalFunction) tr2 <- Traverse(acme, traversal = "pre-order") nms <- sapply(tr, function(x) x$name) nms2 <- sapply(tr2, function(node) node$name) expect_equal(nms, nms2) }) test_that("Traverse custom method multi prune", { CustomTraversalFunction <- function(node) { if (node$isLeaf) return (NULL) return (node$children) } data(acme) tr <- Traverse(acme, traversal = CustomTraversalFunction, pruneFun = function(node) node$name != "IT") tr2 <- Traverse(acme, traversal = "pre-order", pruneFun = function(node) node$name != "IT") nms <- sapply(tr, function(x) x$name) nms2 <- sapply(tr2, function(node) node$name) expect_equal(nms, nms2) }) test_that("Do", { data(acme) calculateAggregateChildCost <- function(node, fun) { if (node$isLeaf) return(node$cost) fun(sapply(node$children, function(x) x$averageCost)) } myFormat <- function(x) { format(x, nsmall=2, scientific = FALSE) } acme$Do(function(x) x$averageCost <- calculateAggregateChildCost(x, mean), traversal = "post-order") get <- acme$Get('averageCost', traversal = "post-order")["New Product Line"] expect_equal(as.numeric(get), 2000000) }) test_that("post-order", { data(acme) acme$Set(myval = 1:acme$totalCount, traversal = "post-order") expect_equal(acme$myval, 11) expect_equal(Climb(acme, "Research")$myval, 6) }) test_that("level", { data(acme) acme$Set(myval = 1:acme$totalCount, traversal = "level") expect_equal(acme$myval, 1) expect_equal(Climb(acme, "Research")$myval, 3) expect_equal(Climb(acme, "IT", "Go agile")$myval, 10) }) test_that("level subtree", { data(acme) it <- Climb(acme, "IT") it$Set(myval = 1:it$totalCount, traversal = "level") expect_equal(it$myval, 1) expect_equal(it$Climb("Outsource")$myval, 2) expect_equal(it$Climb("Go agile")$myval, 3) expect_equal(it$Climb("Switch to R")$myval, 4) }) test_that("prune", { data(acme) acme$Set(myval = 1:8, pruneFun = function(x) x$name != "Research") expect_equal(acme$myval, 1) expect_true(is.null(Climb(acme, "Research")$myval)) expect_true(is.null(Climb(acme, "Research", "New Labs")$myval)) expect_equal(Climb(acme, "IT", "Go agile")$myval, 7) }) test_that("filter", { data(acme) acme$Set(myval = 1:10, filterFun = function(x) x$name != "Research") expect_equal(acme$myval, 1) expect_true(is.null(Climb(acme, "Research")$myval)) expect_equal(Climb(acme, "Research", "New Labs")$myval, 6) expect_equal(Climb(acme, "IT", "Go agile")$myval, 9) }) test_that("isBinary", { node <- Node$new("0") addBinChildren <- function(node, n) { for (i in 1:2) { child <- node$AddChild(paste0(node$name, ".", i)) if (n > 0) addBinChildren(child, n-1) } } addBinChildren(node, 3) expect_true(node$isBinary) }) test_that("in-order", { node <- Node$new("0") addBinChildren <- function(node, n) { for (i in 1:2) { child <- node$AddChild(paste0(node$name, ".", i)) if (n > 0) addBinChildren(child, n-1) } } addBinChildren(node, 2) #make sure the tree is irregular addBinChildren(node$Climb("0.1", "0.1.2", "0.1.2.1"), 0) g <- node$Get("name", traversal = "in-order") expected <- c("0.1.1.1", "0.1.1", "0.1.1.2", "0.1", "0.1.2.1.1", "0.1.2.1", "0.1.2.1.2", "0.1.2", "0.1.2.2", "0", "0.2.1.1", "0.2.1", "0.2.1.2", "0.2", "0.2.2.1", "0.2.2", "0.2.2.2") names(expected) <- expected expect_equal(g, expected) }) test_that("Set recycling", { data(acme) Climb(acme, "Accounting", "New Accounting Standards")$AddChild("ICI 320") acme$Set(myval = 1:6) expect_equal(acme$myval, 1) expect_equal(Climb(acme, "Research", "New Labs")$myval, 2) expect_equal(Climb(acme, "IT", "Go agile")$myval, 5) }) test_that("Aggregate", { data(acme) expect_equal(Aggregate(acme, "cost", sum), 4950000) }) test_that("Clone", { data(acme) n <- Clone(acme) expect_equal(class(n), class(acme)) expect_equal(n$name, acme$name) expect_equal(n$count, acme$count) expect_equal(n$totalCount, acme$totalCount) expect_equal(n$Climb("IT", "Go agile")$p, Climb(acme, "IT", "Go agile")$p) expect_equal(as.list(n), as.list(acme)) acme2 <- acme expect_identical(acme, acme2) #expect_false(n, is_identical_to(acme)) n$name <- 'Acme2' expect_false(n$name == acme$name) }) test_that("Clone formatter", { data(acme) SetFormat(acme, "count", FormatFixedDecimal) SetFormat(Climb(acme, "IT", "Outsource"), "p", FormatPercent) n <- Clone(acme, attributes = TRUE) fo <- attr(n, "formatters")[["count"]] expect_equal(fo, FormatFixedDecimal) fo2 <- attr(n$Climb("IT", "Outsource"), "formatters")[["p"]] expect_equal(fo2, FormatPercent) }) test_that("Clone subtree", { data(acme) it <- Climb(acme, "IT") itcl <- Clone(it) expect_equal(class(itcl), class(it)) expect_equal(itcl$name, it$name) expect_equal(itcl$count, it$count) expect_equal(itcl$totalCount, it$totalCount) expect_equal(itcl$Climb("Go agile")$p, it$Climb("Go agile")$p) expect_true(itcl$isRoot) }) test_that("Aggregate", { data(acme) g <- acme$Get(Aggregate, "p", sum) expect_false(is.na(g[1])) expect_equal(3.65, as.vector(g[1])) }) test_that("Aggregate function", { data(acme) g <- acme$Get(Aggregate, function(x) x$p * x$cost, sum) expect_false(is.na(g[1])) expect_equal(g[[1]], sum(acme$Get(function(x) x$cost * x$p, filterFun = isLeaf))) }) test_that("Formatter Get", { data(acme) SetFormat(acme, "p", FormatPercent) p <- acme$Get("p", format = TRUE) expect_equal(p[["Go agile"]], "5.00 %") }) test_that("Formatter Get Hierarchy", { data(acme) SetFormat(acme, "p", FormatPercent) acme$p <- 1 n <- Climb(acme, "IT") SetFormat(n, "p", FormatFixedDecimal) p <- acme$Get("p", format = TRUE) expect_equal(p[["Acme Inc."]], "100.00 %") expect_equal(p[["Outsource"]], "0.200") p <- acme$Get("p", format = FormatFixedDecimal) expect_equal(p[["Acme Inc."]], "1.000") p <- acme$Get("p", format = function(x) x) expect_equal(p[["Acme Inc."]], 1) expect_true(is.numeric(p[["Acme Inc."]])) expect_equal(p[["Outsource"]], 0.2) }) test_that("Set matrix", { data(acme) acme$Set(id = 1:acme$totalCount) ms <- sapply(1:acme$totalCount, function(x) diag(x)) acme$Set(matrix = ms) msget <- acme$Get("matrix") expect_equal(unname(acme$Get("name")), names(msget)) expect_equal(ms, unname(msget)) }) test_that("Set pre-order", { data(acme) acme$Set(mycnt = 1:acme$totalCount) expect_equal( Climb(acme, "IT")$mycnt, 8) }) test_that("Set post-order", { data(acme) acme$Set(mycnt = 1:acme$totalCount, traversal = "post-order") expect_equal( Climb(acme, "IT")$mycnt, 10) expect_equal( acme$mycnt, 11) }) test_that("Set filter", { data(acme) acme$Set(mycnt = 1:3, filterFun = function(x) x$level == 2) expect_equal( Climb(acme, "IT")$mycnt, 3) expect_equal( acme$mycnt, NULL) }) test_that("Revert", { data(acme) acme$Set(id = 1:acme$totalCount) Revert(acme) ids <- unname(acme$Get("id")) expected = c(1, 8, 11, 10, 9, 5, 7, 6, 2, 4, 3) expect_equal(ids, expected) }) test_that("attributesAll", { data(acme) fa <- acme$attributesAll expect_equal(fa, c("cost", "p")) acme$Set(tta = 1:acme$totalCount) expect_equal(acme$attributesAll, c("tta", "cost", "p")) }) test_that("height", { data(acme) expect_equal(acme$height, 3) expect_equal(Climb(acme, "IT")$height, 2) Climb(acme, "IT", "Outsource")$AddChild("New") expect_equal(acme$height, 4) }) test_that("isRoot", { data(acme) expect_true(acme$isRoot) expect_false(Climb(acme, "IT")$isRoot) expect_equal(Climb(acme, "IT")$height, 2) isRoot <- acme$Get("isRoot") expect_equal(sum(isRoot), 1) }) test_that("isLeaf", { data(acme) expect_false(acme$isLeaf) expect_true(Climb(acme, "Research", "New Labs")$isLeaf) isLeaf <- acme$Get("isLeaf") leaves <- names(isLeaf)[isLeaf] exp <- c("New Software", "New Accounting Standards", "New Product Line", "New Labs", "Outsource", "Go agile", "Switch to R") expect_equal(leaves, exp) }) test_that("level (active)", { data(acme) expect_equal(acme$level, 1) expect_equal(Climb(acme, "Research")$level, 2) expect_equal(Climb(acme, "Research", "New Labs")$level, 3) }) test_that("set name Climb", { data(acme) rs <- Climb(acme, "Research") rs$name <- "Research2" rs2 <- Climb(acme, "Research") expect_true(is.null(rs2)) rs2 <- Climb(acme, "Research2") expect_true(rs2$name == "Research2") expect_equal(names(rs$parent$children), c("Accounting", "Research2", "IT")) }) test_that("change name", { data(acme) # acme$Research$name <- "Research2" # expect_true(is.null(acme$Research)) rs <- acme$Research rs$name <- "Research2" expect_true(is.null(acme$Research)) expect_true(acme$Research2$name == "Research2") }) test_that("attribute function with formatter", { data(acme) SetFormat(acme, "cost", FormatFixedDecimal) acme$IT$cost <- function(self) sum(sapply(self$children, function(x) x$cost)) mycost <- acme$Get("cost", format = TRUE) expect_equal(mycost[[8]], "700000.000") }) test_that("Remove Child", { data(acme) sw <- acme$Accounting$RemoveChild("New Software") expect_equal(sw$name, "New Software") expect_true(sw$isRoot) expect_equal(acme$Accounting$count, 1) expect_equal(names(acme$Accounting$children), c("New Accounting Standards")) }) test_that("Remove Attribute", { data(acme) acme$Research$floor <- 21 expect_true("floor" %in% acme$Research$attributes) acme$Research$RemoveAttribute("floor") expect_false("floor" %in% acme$Research$attributes) }) test_that("Remove Attribute stop", { data(acme) acme$Research$floor <- 21 expect_true("floor" %in% acme$Research$attributes) expect_true(acme$Research$RemoveAttribute("floor", FALSE)) expect_false("floor" %in% acme$Research$attributes) expect_false(acme$IT$RemoveAttribute("floor", FALSE)) }) test_that("Add Sibling", { data(acme) acme$Research$AddSibling("Marketing")$AddChild("Web")$AddSibling("Print") expect_equal(acme$Marketing$position, 3) expect_equal(acme$IT$position, 4) expect_equal(acme$Marketing$Web$siblings[[1]]$name, "Print") }) test_that("print", { data(acme) acme2 <- print(acme, "cost") expect_equal(colnames(acme2), c("levelName", "cost")) }) test_that("print list field", { lol = list(a = list( c = list(1:5), b = 2 )) tree <- FromListSimple(lol) #expect no error expect_error(do.call("print", c(tree, tree$attributesAll)), NA) }) test_that("print list field combo", { aNestedTree = list(a = list( b = 5, a = 2 )) tree <- FromListSimple(aNestedTree) #expect no error expect_error(print(tree, "a"), NA) }) test_that("Cumulate", { data(acme) acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum), traversal = "post-order") acme$Do(function(x) x$cumCost <- Cumulate(x, "cost", sum)) expect_equal(unname(acme$Get("cumCost")), c(4950000, 1500000, 1000000, 1500000, 4250000, 2000000, 2750000, 4950000, 400000, 650000, 700000)) }) test_that("averageBranchingFactor", { t <- CreateRegularTree(3, 3) expect_equal(t$averageBranchingFactor, 3) }) test_that("siblings", { data(acme) s <- acme$IT$siblings expect_equal(2, length(s)) nms <- unname(Get(s, "name")) expect_equal(c("Accounting", "Research"), nms) }) test_that("Distance", { data(acme) d <- Distance(FindNode(acme, "Outsource"), FindNode(acme, "Research")) expect_equal(d, 3) d <- Distance(FindNode(acme, "Outsource"), acme) expect_equal(d, 2) d <- Distance(acme, FindNode(acme, "Outsource")) expect_equal(d, 2) d <- Distance(FindNode(acme, "Outsource"), FindNode(acme, "Outsource")) expect_equal(d, 0) d <- Distance(FindNode(acme, "Outsource"), FindNode(acme, "Go agile")) expect_equal(d, 2) }) test_that("Distance large tree", { tree <- CreateRegularTree(6, 2) d <- Distance(FindNode(tree, "1.2.1.2.2.2"), FindNode(tree, "1.2.2.2.2.2")) expect_equal(d, 8) d <- Distance(FindNode(tree, "1.2.1.2.2.2"), FindNode(tree, "1.1.1.1.1.1")) expect_equal(d, 10) }) test_that("leaves", { data(acme) l <- acme$leaves expect_equal(7, length(l)) expect_equal(unname(sapply(l, function(x) x$name)), c("New Software", "New Accounting Standards", "New Product Line", "New Labs", "Outsource", "Go agile", "Switch to R")) l <- acme$IT$Outsource$leaves expect_equal(typeof(l), "list") expect_equal(length(l), 1) }) ================================================ FILE: tests/testthat/test-treeMethodsSideEffect.R ================================================ context("tree methods side effects") test_that("Sort", { data(acme) acme$Do(function(x) x$totalCost <- Aggregate(x, "cost", sum)) Sort(acme, "totalCost", decreasing = FALSE) get <- acme$Get('totalCost') exp <- c(4950000, 700000, 50000, 250000, 400000, 1500000, 500000, 1000000, 2750000, 750000, 2000000) names(exp) <- c('Acme Inc.', 'IT', 'Switch to R', 'Go agile', 'Outsource', 'Accounting', 'New Accounting Standards', 'New Software', 'Research', 'New Labs', 'New Product Line' ) expect_equal(get, exp) Sort(acme, "totalCost", decreasing = TRUE) get <- acme$Get('totalCost') expect_false(identical(all.equal(get, exp), TRUE)) }) test_that("Prune leaves", { data(acme) Prune(acme, function(x) is.null(x$cost) || x$cost < 1000000) expect_equal(acme$leafCount, 5) expect_equal(acme$totalCount, 9) expect_true(all(acme$Get("cost", filterFun = isLeaf) < 1000000)) }) test_that("Prune name", { data(acme) Prune(acme, function(x) x$name != "IT") expect_equal(acme$leafCount, 4) expect_equal(acme$totalCount, 7) expect_true(is.null(Climb(acme, "IT"))) }) ================================================ FILE: tests/testthat/test-util.R ================================================ context("util") test_that("createRegular", { lvls <- 3 children <- 4 t <- CreateRegularTree(height = 3, branchingFactor = 4) expect_equal(t$leafCount, children ^ (lvls - 1)) expect_equal(t$height, lvls) expect_true(all(t$Get(function(x) x$count, filterFun = isNotLeaf) == 4)) }) test_that("createRandomTree", { t <- CreateRandomTree(nodes = 100) expect_equal(t$totalCount, 101) }) test_that("PruneDist 1", { data(acme) acme1 <- data.tree:::PrintPruneDist(acme, limit = 1) expect_equal(acme1$totalCount, 2) expect_equal(acme1$children[[1]]$name, "... 3 nodes w/ 7 sub") }) test_that("PruneDist 2", { data(acme) acme1 <- data.tree:::PrintPruneDist(acme, limit = 5) expect_equal(acme1$totalCount, 8) expect_equal(acme1$IT$children[[1]]$name, "... 3 nodes w/ 0 sub") }) test_that("PruneSimple", { data(acme) acme1 <- data.tree:::PrintPruneSimple(acme, limit = 5) expect_equal(acme1$totalCount, 5) expect_equal(acme1$children[[2]]$name, "... 2 nodes w/ 5 sub") }) ================================================ FILE: tests/testthat.R ================================================ library(testthat) test_check("data.tree") #devtools::test() ================================================ FILE: vignettes/applications.Rmd ================================================ --- title: "data.tree sample applications" author: "Christoph Glur" date: '`r Sys.Date()`' output: html_document: includes: before_body: applications.banner.html theme: cerulean toc: yes toc_depth: 2 word_document: default --- ```{r echo=F} ### get knitr just the way we like it knitr::opts_chunk$set( message = FALSE, warning = FALSE, error = FALSE, tidy = FALSE, cache = FALSE ) ``` # Introduction This vignette gives you a quick introduction to data.tree applications. We took care to keep the examples simple enough so non-specialists can follow them. The price for this is, obviously, that the examples are often simple compared to real-life applications. If you are using data.tree for things not listed here, and if you believe this is of general interest, then please do drop us a note, so we can include your application in a future version of this vignette. # World PopulationTreeMap (visualization) This example is inspired by the examples of the treemap package. You'll learn how to * convert a data.frame to a data.tree structure * navigate a tree and locate specific nodes * use `Aggregate` and `Cumulate` * manipulate an existing tree, e.g. by using the `Prune` method ## Original Example, to be improved The original example visualizes the world population as a tree map. ```{r} library(treemap) data(GNI2014) treemap(GNI2014, index=c("continent", "iso3"), vSize="population", vColor="GNI", type="value") ``` As there are many countries, the chart gets clustered with many very small boxes. In this example, we will limit the number of countries and sum the remaining population in a catch-all country called "Other". We use data.tree to do this aggregation. ## Convert from data.frame First, let's convert the population data into a data.tree structure: ```{r} library(data.tree) GNI2014$continent <- as.character(GNI2014$continent) GNI2014$pathString <- paste("world", GNI2014$continent, GNI2014$country, sep = "/") tree <- as.Node(GNI2014[,]) print(tree, pruneMethod = "dist", limit = 20) ``` We can also navigate the tree to find the population of a specific country. Luckily, RStudio is quite helpful with its code completion (use `CTRL + SPACE`): ```{r} tree$Europe$Switzerland$population ``` Or, we can look at a sub-tree: ```{r} northAm <- tree$`North America` Sort(northAm, "GNI", decreasing = TRUE) print(northAm, "iso3", "population", "GNI", limit = 12) ``` Or, we can find out what is the country with the largest GNI: ```{r} maxGNI <- Aggregate(tree, "GNI", max) #same thing, in a more traditional way: maxGNI <- max(sapply(tree$leaves, function(x) x$GNI)) tree$Get("name", filterFun = function(x) x$isLeaf && x$GNI == maxGNI) ``` ## Aggregate and Cumulate We aggregate the population. For non-leaves, this will recursively iterate through children, and cache the result in the `population` field. ```{r} tree$Do(function(x) { x$population <- Aggregate(node = x, attribute = "population", aggFun = sum) }, traversal = "post-order") ``` Next, we sort each node by population: ```{r} Sort(tree, attribute = "population", decreasing = TRUE, recursive = TRUE) ``` Finally, we cumulate among siblings, and store the running sum in an attribute called `cumPop`: ```{r} tree$Do(function(x) x$cumPop <- Cumulate(x, "population", sum)) ``` The tree now looks like this: ```{r} print(tree, "population", "cumPop", pruneMethod = "dist", limit = 20) ``` ## Prune The previous steps were done to define our threshold: big countries should be displayed, while small ones should be grouped together. This lets us define a pruning function that will allow a maximum of 7 countries per continent, and that will prune all countries making up less than 90% of a continent's population. We would like to store the original number of countries for further use: ```{r} tree$Do(function(x) x$origCount <- x$count) ``` We are now ready to prune. This is done by defining a pruning function, returning 'FALSE' for all countries that should be combined: ```{r} myPruneFun <- function(x, cutoff = 0.9, maxCountries = 7) { if (isNotLeaf(x)) return (TRUE) if (x$position > maxCountries) return (FALSE) return (x$cumPop < (x$parent$population * cutoff)) } ``` We clone the tree, because we might want to play around with different parameters: ```{r} treeClone <- Clone(tree, pruneFun = myPruneFun) print(treeClone$Oceania, "population", pruneMethod = "simple", limit = 20) ``` Finally, we need to sum countries that we pruned away into a new "Other" node: ```{r} treeClone$Do(function(x) { missing <- x$population - sum(sapply(x$children, function(x) x$population)) other <- x$AddChild("Other") other$iso3 <- paste0("OTH(", x$origCount, ")") other$country <- "Other" other$continent <- x$name other$GNI <- 0 other$population <- missing }, filterFun = function(x) x$level == 2 ) print(treeClone$Oceania, "population", pruneMethod = "simple", limit = 20) ``` ## Plot ### Plotting the treemap In order to plot the treemap, we need to convert the data.tree structure back to a data.frame: ```{r} df <- ToDataFrameTable(treeClone, "iso3", "country", "continent", "population", "GNI") treemap(df, index=c("continent", "iso3"), vSize="population", vColor="GNI", type="value") ``` ### Plot as dendrogram Just for fun, and for no reason other than to demonstrate conversion to dendrogram, we can plot this in a very unusual way: ```{r} plot(as.dendrogram(treeClone, heightAttribute = "population")) ``` ## Further developments Obviously, we should also aggregate the GNI as a weighted average. Namely, we should do this for the *OTH* catch-all countries that we add to the tree. # Portfolio Breakdown (finance) In this example, we show how to display an investment portfolio as a hierarchic breakdown into asset classes. You'll see: * how you can re-use a traversal * advanced use of `Aggregate` * how to add default attribute formatters to your tree ## Convert from data.frame ```{r} fileName <- system.file("extdata", "portfolio.csv", package="data.tree") pfodf <- read.csv(fileName, stringsAsFactors = FALSE) head(pfodf) ``` Let us convert the data.frame to a data.tree structure. Here, we use again the path string method. For other options, see `?as.Node.data.frame` ```{r} pfodf$pathString <- paste("portfolio", pfodf$AssetCategory, pfodf$AssetClass, pfodf$SubAssetClass, pfodf$ISIN, sep = "/") pfo <- as.Node(pfodf) ``` ## Aggregate To calculate the weight per asset class, we use the `Aggregate` method: ```{r} t <- Traverse(pfo, traversal = "post-order") Do(t, function(x) x$Weight <- Aggregate(node = x, attribute = "Weight", aggFun = sum)) ``` We now calculate the `WeightOfParent`, ```{r} Do(t, function(x) x$WeightOfParent <- x$Weight / x$parent$Weight) ``` Duration is a bit more complicated, as this is a concept that applies only to the fixed income asset class. Note that, in the second statement, we are reusing the traversal from above. ```{r} pfo$Do(function(x) x$Duration <- ifelse(is.null(x$Duration), 0, x$Duration), filterFun = isLeaf) Do(t, function(x) x$Duration <- Aggregate(x, function(x) x$WeightOfParent * x$Duration, sum)) ``` ## Formatters We can add default formatters to our data.tree structure. Here, we add them to the root, but we might as well add them to any Node in the tree. ```{r} SetFormat(pfo, "WeightOfParent", function(x) FormatPercent(x, digits = 1)) SetFormat(pfo, "Weight", FormatPercent) FormatDuration <- function(x) { if (x != 0) res <- FormatFixedDecimal(x, digits = 1) else res <- "" return (res) } SetFormat(pfo, "Duration", FormatDuration) ``` These formatter functions will be used when printing a data.tree structure. ## Print ```{r} #Print print(pfo, "Weight", "WeightOfParent", "Duration", filterFun = function(x) !x$isLeaf) ``` # ID3 (machine learning) This example shows you the following: * How to build a data.tree structure in an algorithm * How to prune a tree * How to use data.tree to develop learning algorithms Thanks a lot for all the helpful comments made by Holger von Jouanne-Diedrich. Classification trees are very popular these days. If you have never come across them, you might be interested in [classification trees](http://en.wikipedia.org/wiki/Decision_tree_learning). These models let you *classify* observations (e.g. things, outcomes) according to the observations' qualities, called *features*. Essentially, all of these models consist of creating a *tree*, where each *node* acts as a *router*. You insert your mushroom *instance* at the *root* of the tree, and then, depending on the mushroom's *features* (size, points, color, etc.), you follow along a different *path*, until a *leaf* node spits out your mushroom's *class*, i.e. whether it's edible or not. There are two different steps involved in using such a model: *training* (i.e. constructing the tree), and *predicting* (i.e. using the tree to predict whether a given mushroom is poisonous). This example provides code to do both, using one of the very early algorithms to classify data according to discrete features: [ID3](http://en.wikipedia.org/wiki/ID3_algorithm). It lends itself well for this example, but of course today there are much more elaborate and refined algorithms available. ## ID3 Introduction During the prediction step, each node routes our mushroom according to a feature. But how do we chose the feature? Should we first separate our set according to color or size? That is where classification models differ. In ID3, we pick, at each node, the feature with the highest *Information Gain*. In a nutshell, this is the feature which splits the sample in the possibly *purest* subsets. For example, in the case of mushrooms, *dots* might be a more sensible feature than *organic*. ### Purity and Entropy ```{r} IsPure <- function(data) { length(unique(data[,ncol(data)])) == 1 } ``` The *entropy* is a measure of the purity of a dataset. ```{r} Entropy <- function( vls ) { res <- vls/sum(vls) * log2(vls/sum(vls)) res[vls == 0] <- 0 -sum(res) } ``` ### Information Gain Mathematically, the information gain IG is defined as: $$ IG(T,a) = H(T)-\sum_{v\in vals(a)}\frac{|\{\textbf{x}\in T|x_a=v\}|}{|T|} \cdot H(\{\textbf{x}\in T|x_a=v\}) $$ In words, the information gain measures the *difference* between the entropy *before the split*, and the weighted sum of the entropies *after the split*. So, let's rewrite that in R: ```{r} InformationGain <- function( tble ) { entropyBefore <- Entropy(colSums(tble)) s <- rowSums(tble) entropyAfter <- sum (s / sum(s) * apply(tble, MARGIN = 1, FUN = Entropy )) informationGain <- entropyBefore - entropyAfter return (informationGain) } ``` ## Training We are all set for the ID3 training algorithm. ### Pseudo code We start with the entire training data, and with a root. Then: 1. if the data-set is pure (e.g. all toxic), then 1. construct a leaf having the name of the class (e.g. 'toxic') 2. else 1. choose the feature with the highest information gain (e.g. 'color') 2. for each value of that feature (e.g. 'red', 'brown', 'green') 1. take the subset of the data-set having that feature value 2. construct a child node having the name of that feature value (e.g. 'red') 3. call the algorithm recursively on the child node and the subset ### Implementation in R with the data.tree package For the following implementation, we assume that the classifying features are in columns 1 to n-1, whereas the class (the edibility) is in the last column. ```{r} TrainID3 <- function(node, data) { node$obsCount <- nrow(data) #if the data-set is pure (e.g. all toxic), then if (IsPure(data)) { #construct a leaf having the name of the pure feature (e.g. 'toxic') child <- node$AddChild(unique(data[,ncol(data)])) node$feature <- tail(names(data), 1) child$obsCount <- nrow(data) child$feature <- '' } else { #calculate the information gain ig <- sapply(colnames(data)[-ncol(data)], function(x) InformationGain( table(data[,x], data[,ncol(data)]) ) ) #chose the feature with the highest information gain (e.g. 'color') #if more than one feature have the same information gain, then take #the first one feature <- names(which.max(ig)) node$feature <- feature #take the subset of the data-set having that feature value childObs <- split(data[ ,names(data) != feature, drop = FALSE], data[ ,feature], drop = TRUE) for(i in 1:length(childObs)) { #construct a child having the name of that feature value (e.g. 'red') child <- node$AddChild(names(childObs)[i]) #call the algorithm recursively on the child and the subset TrainID3(child, childObs[[i]]) } } } ``` ### Training with data Our training data looks like this: ```{r} library(data.tree) data(mushroom) mushroom ``` Indeed, a bit small. But you get the idea. We are ready to train our decision tree by running the function: ```{r} tree <- Node$new("mushroom") TrainID3(tree, mushroom) print(tree, "feature", "obsCount") ``` ## Prediction ### The prediction method We need a predict function, which will route data through our tree and make a prediction based on the leave where it ends up: ```{r} Predict <- function(tree, features) { if (tree$children[[1]]$isLeaf) return (tree$children[[1]]$name) child <- tree$children[[features[[tree$feature]]]] return ( Predict(child, features)) } ``` ### Using the prediction method And now we use it to predict: ```{r} Predict(tree, c(color = 'red', size = 'large', points = 'yes') ) ``` Oops! Looks like trusting classification blindly might get you killed. # Jenny Lind (decision tree, plotting) This demo calculates and plots a simple decision tree. It demonstrates the following: * how to read a yaml file into a data.tree structure * how to calculate a decision tree * how to plot a data.tree with the data.tree plotting facility ## Load YAML file YAML is similar to JSON, but targeted towards humans (as opposed to computers). It's consise and easy to read. YAML can be a neat format to store your data.tree structures, as you can use it across different software and systems, you can edit it with any text editor, and you can even send it as an email. This is how our YAML file looks: ```{r} fileName <- system.file("extdata", "jennylind.yaml", package="data.tree") cat(readChar(fileName, file.info(fileName)$size)) ``` Let's convert the YAML into a data.tree structure. First, we load it with the yaml package into a list of lists. Then we use `as.Node` to convert the list into a data.tree structure: ```{r} library(data.tree) library(yaml) lol <- yaml.load_file(fileName) jl <- as.Node(lol) print(jl, "type", "payoff", "p") ``` ## Calculate Next, we define our payoff function, and apply it to the tree. Note that we use post-order traversal, meaning that we calculate the tree from leaf to root: ```{r} payoff <- function(node) { if (node$type == 'chance') node$payoff <- sum(sapply(node$children, function(child) child$payoff * child$p)) else if (node$type == 'decision') node$payoff <- max(sapply(node$children, function(child) child$payoff)) } jl$Do(payoff, traversal = "post-order", filterFun = isNotLeaf) ``` The decision function is the next step. Note that we filter on decision nodes: ```{r} decision <- function(x) { po <- sapply(x$children, function(child) child$payoff) x$decision <- names(po[po == x$payoff]) } jl$Do(decision, filterFun = function(x) x$type == 'decision') ``` ## Plot ### Plot with the data.tree plotting facility The data tree plotting facility uses GraphViz / DiagrammeR. You can provide a function as a style: ```{r} GetNodeLabel <- function(node) switch(node$type, terminal = paste0( '$ ', format(node$payoff, scientific = FALSE, big.mark = ",")), paste0('ER\n', '$ ', format(node$payoff, scientific = FALSE, big.mark = ","))) GetEdgeLabel <- function(node) { if (!node$isRoot && node$parent$type == 'chance') { label = paste0(node$name, " (", node$p, ")") } else { label = node$name } return (label) } GetNodeShape <- function(node) switch(node$type, decision = "box", chance = "circle", terminal = "none") SetEdgeStyle(jl, fontname = 'helvetica', label = GetEdgeLabel) SetNodeStyle(jl, fontname = 'helvetica', label = GetNodeLabel, shape = GetNodeShape) ``` Note that the `fontname` is inherited as is by all children, whereas e.g. the `label` argument is a function, it's called on each inheriting child node. Another alternative is to set the style per node: ```{r} jl$Do(function(x) SetEdgeStyle(x, color = "red", inherit = FALSE), filterFun = function(x) !x$isRoot && x$parent$type == "decision" && x$parent$decision == x$name) ``` Finally, we direct our plot from left-to-right, and use the plot function to display: ```{r, eval = FALSE} SetGraphStyle(jl, rankdir = "LR") plot(jl) ``` ![](assets/dtree.png) # Bubble Chart (visualization) In this example, we will replicate Mike Bostock's bubble example. See here for details: http://bl.ocks.org/mbostock/4063269. We use Joe Cheng's [bubbles](https://github.com/jcheng5/bubbles) package. All of this is inspired by [Timelyportfolio](https://github.com/timelyportfolio), the king of [htmlwidgets](http://www.htmlwidgets.org). You'll learn how to convert a complex JSON into a data.frame, and how to use this to plot hierarchic visualizations. ## Load JSON file The data represents the Flare class hierarchy, which is a code library for creating visualizations. The JSON is long, deeply nested, and complicated. ```{r} fileName <- system.file("extdata", "flare.json", package="data.tree") flareJSON <- readChar(fileName, file.info(fileName)$size) cat(substr(flareJSON, 1, 300)) ``` So, let's convert it into a data.tree structure: ```{r} library(jsonlite) flareLoL <- fromJSON(file(fileName), simplifyDataFrame = FALSE ) flareTree <- as.Node(flareLoL, mode = "explicit", check = "no-warn") flareTree$attributesAll print(flareTree, "size", limit = 30) ``` Finally, we can convert it into a data.frame. The `ToDataFrameTable` only converts leafs, but inherits attributes from ancestors: ```{r} flare_df <- ToDataFrameTable(flareTree, className = function(x) x$parent$name, packageName = "name", "size") head(flare_df) ``` This does not look spectacular. But take a look at this [stack overflow](http://stackoverflow.com/questions/31339805/converting-json-format-to-csv-to-upload-data-table-in-r-to-produce-d3-bubble-cha) question to see how people struggle to do this type of operation. Here, it was particularly simple, because the underlying JSON structure is regular. If it were not (e.g. some nodes contain different attributes than others), the conversion from JSON to data.tree would still work. And then, as a second step, we could modify the data.tree structure before converting it into a data.frame. For example, we could use `Prune` and `Remove` to remove unwanted nodes, use `Set` to remove or add default values, etc. ## Plot What follows has nothing to do with data.tree anymore. We simply provide the bubble chart printing for your enjoyment. In order to run it yourself, you need to install the bubbles package from github: ```{r, eval = FALSE} devtools::install_github("jcheng5/bubbles@6724e43f5e") library(scales) library(bubbles) library(RColorBrewer) bubbles( flare_df$size, substr(flare_df$packageName, 1, 2), tooltip = flare_df$packageName, color = col_factor( brewer.pal(9,"Set1"), factor(flare_df$className) )(flare_df$className), height = 800, width = 800 ) ``` ![](assets/bubbles.jpg) # File Explorer (system utilities) In this example, we print the files that exist in the folder structure of the file system. As a special goodie, we'll show code that lets you build your own *R File Explorer*, an interactive tree / list widget that lets you expand folders and browse through your file system. ## Print First, let's read the files in a directory tree into R. In this example, the root path ".." is the parent of the `vignettes` folder, i.e. the data.tree package folder itself: ```{r} path <- ".." files <- list.files(path = path, recursive = TRUE, include.dirs = FALSE) df <- data.frame( filename = sapply(files, function(fl) paste0("data.tree","/",fl) ), file.info(paste(path, files, sep = "/")), stringsAsFactors = FALSE ) print(head(df)[c(1,2,3,4)], row.names = FALSE) ``` We now convert this into a data.tree: ```{r} fileStructure <- as.Node(df, pathName = "filename") fileStructure$leafCount / (fileStructure$totalCount - fileStructure$leafCount) print(fileStructure, "mode", "size", limit = 25) ``` ## Listviewer html widget Finally, we can display the files by timelyportfolio's listviewer. As it's not on CRAN, we only display a screenshot of the widget in in this vignette. This is not half as fun as the interactive widget, of course. So please try it out for yourself to see it in action. ```{r, eval = FALSE} #This requires listviewer, which is available only on github devtools::install_github("timelyportfolio/listviewer") library(listviewer) l <- ToListSimple(fileStructure) jsonedit(l) ``` ![](assets/listviewer.jpg) (Run the code yourself to see the widget in action) # Gene Defect (genetics, probabilities, multi-generation models) This is a simplistic example from the area of genetics. Similar models are found in many attributes, namely wherever you have multi-generation models and probabilities. The code generates 100 simulations of a 3 generation population. Individuals can inherit or develop a certain feature (e.g. colour blindness). The probability to develop the feature is based on sex. We then plot the probability distribution of the feature in the last generation. You'll learn how to build a data.tree structure according to probabilistic rules, and how to use the structure to infer a probability distribution. ## Algorithm First, we generate a family tree of a population exhibiting a certain feature (e.g. colour blindness). ```{r} #' @param children the number of children each population member has #' @param probSex the probability of the sex of a descendant #' @param probInherit the probability the feature is inherited, depending on the sex of the descendant #' @param probDevelop the probability the feature is developed (e.g. a gene defect), depending on the sex #' of the descendant #' @param generations the number of generations our simulated population should have #' @param parent for recursion GenerateChildrenTree <- function(children = 2, probSex = c(male = 0.52, female = 0.48), probInherit = c(male = 0.8, female = 0.5), probDevelop = c(male = 0.05, female = 0.01), generations = 3, parent = NULL) { if (is.null(parent)) { parent <- Node$new("1") parent$sex <- 1 parent$feature <- TRUE parent$develop <- FALSE } #sex of descendants #1 = male #2 = female sex <- sample.int(n = 2, size = children, replace = TRUE, prob = probSex) for (i in 1:children) child <- parent$AddChild(i) Set(parent$children, sex = sex) #inherit if (parent$feature == TRUE) { for (i in 1:2) { subPop <- Traverse(parent, filterFun = function(x) x$sex == i) inherit <- sample.int(n = 2, size = length(subPop), replace = TRUE, prob = c(1 - probInherit[i], probInherit[i])) Set(subPop, feature = as.logical(inherit - 1)) } } else { Set(parent$children, feature = FALSE) } #develop Set(parent$children, develop = FALSE) for (i in 1:2) { subPop <- Traverse(parent, filterFun = function(x) x$sex == i && !x$feature) develop <- sample.int(n = 2, size = length(subPop), replace = TRUE, prob = c(1 - probDevelop[i], probDevelop[i])) Set(subPop, feature = as.logical((develop - 1)), develop = as.logical((develop - 1))) } #recursion to next generation if (generations > 0) for (i in 1:children) GenerateChildrenTree(children, probSex, probInherit, probDevelop, generations - 1, parent$children[[i]]) return (parent) } ``` ## Analysis Just for demonstration purpose, this is what a tree looks like: ```{r} tree <- GenerateChildrenTree() print(tree, "sex", "feature", "develop", limit = 20) ``` How big is our population after three generations? ```{r} tree$totalCount ``` For a given tree, how many have the feature? ```{r} length(Traverse(tree, filterFun = function(x) x$feature)) ``` How many males have developed the feature without inheritance? ```{r} length(Traverse(tree, filterFun = function(x) x$sex == 1 && x$develop)) ``` What is the occurrence of the feature in the last generation? ```{r} FreqLastGen <- function(tree) { l <- tree$leaves sum(sapply(l, function(x) x$feature))/length(l) } FreqLastGen(tree) ``` ## Simulation Generate 100 sample trees and get the frequency of the feature in the last generation ```{r} system.time(x <- sapply(1:100, function(x) FreqLastGen(GenerateChildrenTree()))) ``` Plot a histogram of the frequency of the defect in the last generation: ```{r} hist(x, probability = TRUE, main = "Frequency of feature in last generation") ``` For larger populations, you might consider parallelisation, of course. See below for some hints. ## Parallelisation It is straight forward to parallelise the simulation. If, as in this example, you do not need to pass around a data.tree structure from one process (fork) to another, it is also rather efficient. ```{r, eval = FALSE} library(foreach) library(doParallel) registerDoParallel(makeCluster(3)) #On Linux, there are other alternatives, e.g.: library(doMC); registerDoMC(3) system.time(x <- foreach (i = 1:100, .packages = "data.tree") %dopar% FreqLastGen(GenerateChildrenTree())) stopImplicitCluster() ``` ```{r, echo = FALSE} print(c(user = 0.07, system = 0.02, elapsed = 1.40)) ``` For the more complicated case where you want to parallelise operations on a single tree, see below. # Tic-Tac-Toe (game complexity) In this example, we do a brute force solution of Tic-Tac-Toe, the well-known 3*3 game. You'll learn how data.tree can be used to build a tree of game history, and how the resulting data.tree structure can be used to analyze the game. In addition, this example shows you how parallelisation can speed up data.tree. We want to set up the problem in a way such that each `Node` is a move of a player, and each path describes the entire history of a game. We number the attributes from 1 to 9. Additionally, for easy readability, we label the Nodes in an Excel-like manner, such that field 9, say, is 'c3': ```{r} attributes <- expand.grid(letters[1:3], 1:3) attributes ``` To speed up things a bit, we consider rotation, so that, say, the first move in a3 and a1 are considered equal, because they could be achieved with a 90 degree rotation of the board. This leaves us with only a3, b3, and b2 for the first move of player 1: ```{r} ttt <- Node$new("ttt") #consider rotation, so first move is explicit ttt$AddChild("a3") ttt$a3$f <- 7 ttt$AddChild("b3") ttt$b3$f <- 8 ttt$AddChild("b2") ttt$b2$f <- 5 ttt$Set(player = 1, filterFun = isLeaf) ``` ## Game play Now we recurse through the tree, and add possible moves to the leaves, growing it eventually to hold all possible games. To do this, we define a method which, based on a `Node's` path, adds possible moves as children. ```{r} AddPossibleMoves <- function(node) { t <- Traverse(node, traversal = "ancestor", filterFun = isNotRoot) available <- rownames(attributes)[!rownames(attributes) %in% Get(t, "f")] for (f in available) { child <- node$AddChild(paste0(attributes[f, 1], attributes[f, 2])) child$f <- as.numeric(f) child$player <- ifelse(node$player == 1, 2, 1) hasWon <- HasWon(child) if (!hasWon && child$level <= 10) AddPossibleMoves(child) if (hasWon) { child$result <- child$player print(paste("Player ", child$player, "wins!")) } else if(child$level == 10) { child$result <- 0 print("Tie!") } } return (node) } ``` Note that we store additional info along the way. For example, in the line `child$player <- ifelse(node$player == 1, 2, 1)`, the player is deferred from the parent `Node`, and set as an attribute in the `Node`. ## Exit Criteria Our algorithm stops whenever either player has won, or when all 9 attributes are taken. Whether a player has won is determined by this function: ```{r} HasWon <- function(node) { t <- Traverse(node, traversal = "ancestor", filterFun = function(x) !x$isRoot && x$player == node$player) mine <- Get(t, "f") mineV <- rep(0, 9) mineV[mine] <- 1 mineM <- matrix(mineV, 3, 3, byrow = TRUE) result <- any(rowSums(mineM) == 3) || any(colSums(mineM) == 3) || sum(diag(mineM)) == 3 || sum(diag(t(mineM))) == 3 return (result) } ``` ## Tree creation The following code plays all possible games. Depending on your computer, this might take a few minutes: ```{r, eval=FALSE} system.time(for (child in ttt$children) AddPossibleMoves(child)) ``` ```{r, echo= FALSE} c(user = 345.645, system = 3.245, elapsed = 346.445) ``` ## Analysis What is the total number of games? ```{r, eval = FALSE} ttt$leafCount ``` ```{r, echo = FALSE} 89796 ``` How many nodes (moves) does our tree have? ```{r, eval = FALSE} ttt$totalCount ``` ```{r, echo = FALSE} 203716 ``` What is the average length of a game? ```{r, eval = FALSE} mean(ttt$Get(function(x) x$level - 1, filterFun = isLeaf)) ``` ```{r, echo = FALSE} 8.400775 ``` What is the average branching factor? ```{r, eval = FALSE} ttt$averageBranchingFactor ``` ```{r, echo = FALSE} 1.788229 ``` How many games were won by each player? ```{r, eval = FALSE} winnerOne <- Traverse(ttt, filterFun = function(x) x$isLeaf && x$result == 1) winnerTwo <- Traverse(ttt, filterFun = function(x) x$isLeaf && x$result == 2) ties <- Traverse(ttt, filterFun = function(x) x$isLeaf && x$result == 0) c(winnerOne = length(winnerOne), winnerTwo = length(winnerTwo), ties = length(ties)) ``` ```{r, echo=FALSE} c(winnerOne = 39588, winnerTwo = 21408, ties = 28800) ``` We can, for example, look at any Node, using the `PrintBoard` function. This function prints the game history: ```{r} PrintBoard <- function(node) { mineV <- rep(0, 9) t <- Traverse(node, traversal = "ancestor", filterFun = function(x) !x$isRoot && x$player == 1) field <- Get(t, "f") value <- Get(t, function(x) paste0("X", x$level - 1)) mineV[field] <- value t <- Traverse(node, traversal = "ancestor", filterFun = function(x) !x$isRoot && x$player == 2) field <- Get(t, "f") value <- Get(t, function(x) paste0("O", x$level - 1)) mineV[field] <- value mineM <- matrix(mineV, 3, 3, byrow = TRUE) rownames(mineM) <- letters[1:3] colnames(mineM) <- as.character(1:3) mineM } ``` The first number denotes the move (1 to 9). The second number is the player: ```{r, eval = FALSE} PrintBoard(ties[[1]]) ``` ```{r, echo = FALSE} mt <- matrix(c("O2", "X3", "O4", "X5", "O6", "X7", "X1", "O8", "X9"), nrow = 3, ncol = 3, byrow = TRUE) rownames(mt) <- letters[1:3] colnames(mt) <- as.character(1:3) mt ``` Exercise: Do the same for Chess! ## Parallelisation Here, the parallelisation is more challenging as with the [Gene Defect](#GeneDefect) example above. The reason is that we have only one tree, albeit a big one. So we need a strategy to do what we call intra-tree parallelisation. In a perfect world, data.tree and intra-tree parallelisation would tell a love story: Many operations are recursive, and can be called equally well on a subtree or on an entire tree. Therefore, it is very natural to delegate the calculation of multiple sub-trees to different processes. For example, tic-tac-toe seems almost trivial to parallelise: Remember that, on level 2, we created manually 3 `Nodes`. The creation of the sub-trees on these `Nodes` will be completely independent on the other sub-trees. Then, each sub-tree can be created in its own process. So, in theory, we could use any parallelisation mechanism available in R. Unfortunately, you need to take into account a few things. As a matter of fact, to pass the sub-trees from a fork process back to the main process, R needs to serialize the `Nodes` of the sub-tree, and this results in huge objects. As a result, collecting the sub-trees would take ages. So, instead, we can 1. create the sub-trees, each in its own process 2. run the analysis in the child process 3. return the result of the analysis to the main process 4. aggregate the results ```{r, eval=FALSE} AnalyseTicTacToe <- function(subtree) { # 1. create sub-tree AddPossibleMoves(subtree) # 2. run the analysis winnerOne <- Traverse(subtree, filterFun = function(x) x$isLeaf && x$result == 1) winnerTwo <- Traverse(subtree, filterFun = function(x) x$isLeaf && x$result == 2) ties <- Traverse(subtree, filterFun = function(x) x$isLeaf && x$result == 0) res <- c(winnerOne = length(winnerOne), winnerTwo = length(winnerTwo), ties = length(ties)) # 3. return the result return(res) } library(foreach) library(doParallel) registerDoParallel(makeCluster(3)) #On Linux, there are other alternatives, e.g.: library(doMC); registerDoMC(3) system.time( x <- foreach (child = ttt$children, .packages = "data.tree") %dopar% AnalyseTicTacToe(child) ) ``` ```{r, echo= FALSE} c(user = 0.05, system = 0.04, elapsed = 116.86) ``` ```{r, eval = FALSE} stopImplicitCluster() # 4. aggregate results rowSums(sapply(x, c)) ``` ```{r, echo=FALSE} c(winnerOne = 39588, winnerTwo = 21408, ties = 28800) ``` ================================================ FILE: vignettes/applications.banner.html ================================================ Banner

================================================ FILE: vignettes/data.tree.Rmd ================================================ --- title: "Introduction to data.tree" author: "Christoph Glur" date: '`r Sys.Date()`' output: html_document: includes: before_body: intro.banner.html self_contained: yes theme: cerulean toc: yes toc_depth: 2 pdf_document: toc: yes toc_depth: 2 --- ```{r echo=F} ### get knitr just the way we like it knitr::opts_chunk$set( message = FALSE, warning = FALSE, error = FALSE, tidy = FALSE, cache = FALSE ) ``` # Introduction ## Trees Trees are ubiquitous in mathematics, computer science, data sciences, finance, and in many other attributes. Trees are especially useful when we are facing *hierarchical data*. For example, trees are used: * in decision theory (cf. decision trees) * in machine learning (e.g. classification trees) * in finance, e.g. to classify financial instruments into asset classes * in routing algorithms * in computer science and programming (e.g. binary search trees, XML) * e.g. for family trees For more details, see the applications vignette by typing `vignette("applications", package = "data.tree")` ## Trees in R Tree-like structures are already used in R. For example, environments can be seen as nodes in a tree. And CRAN provides numerous packages that deal with tree-like structures, especially in the area of decision theory. Yet, there is no general purpose hierarchical data structure that could be used as conveniently and generically as, say, `data.frame`. As a result, people often try to resolve hierarchical problems in a tabular fashion, for instance with data.frames. But often, hierarchies don't marry with tables, and various workarounds are usually required. ## Trees in `data.tree` This package offers an alternative. The `data.tree` package lets you create hierarchies, called `data.tree` **structures**. The building block of theses structures are `Node` objects. The package provides basic traversal, search, and sort operations, and an infrastructure for recursive tree programming. You can decorate `Nodes` with your own attributes and methods, so as to extend the package to your needs. The package also provides convenience methods for neatly printing and plotting trees. It supports conversion from and to `data.frames`, `lists`, and other tree structures such as `dendrogram`, `phylo` objects from the ape package, `igraph`, and other packages. Technically, `data.tree` structures are bi-directional, ordered trees. Bi-directional means that you can navigate from parent to children and vice versa. Ordered means that the sort order of the children of a parent node is well-defined. # `data.tree` basics ## Definitions * __`data.tree` structure__: a _tree_, consisting of multiple `Node` objects. Often, the entry point to a `data.tree` structure is the _root Node_ * __`Node`__: both a class and the basic building block of `data.tree` structures * __attribute__: an active, a field, or a method. **Not to be confused with standard R attributes, c.f. `?attr`, which have a different meaning. Many methods and functions have an `attribute` arg, which can refer to a an active, a field or a method. For example, see `?Get` * __active__ (sometimes called property): a field on a `Node` that can be called like an attribute, but behaves like a function without arguments. For example: `node$position` * __field__: a named value on a `Node`, e.g. `node$cost <- 2500` * __method__: a function acting on an object (on a `Node` in this context). Many methods are available in OO style (e.g. `node$Revert()`) or in traditional style (`Revert(node)`) * __inheritance__: in this context, inheritance refers to a situation in which a child `Node` inherits e.g. an attribute from one of its ancestors. For example, see `?Get`, `?SetNodeStyle` ## Tree creation There are different ways to create a `data.tree` structure. For example, you can create a tree **programmatically**, by **conversion** from other R objects, or from a **file**. ### Create a tree programmatically Let's start by creating a tree programmatically. We do this by creating `Node` objects, and linking them together so as to define the parent-child relationships. In this example, we are looking at a company, Acme Inc., and the tree reflects its organisational structure. The root (level 1) is the company. On level 2, the nodes represent departments, and the leaves of the tree represent projects that the company is considering for next year: ```{r} library(data.tree) acme <- Node$new("Acme Inc.") accounting <- acme$AddChild("Accounting") software <- accounting$AddChild("New Software") standards <- accounting$AddChild("New Accounting Standards") research <- acme$AddChild("Research") newProductLine <- research$AddChild("New Product Line") newLabs <- research$AddChild("New Labs") it <- acme$AddChild("IT") outsource <- it$AddChild("Outsource") agile <- it$AddChild("Go agile") goToR <- it$AddChild("Switch to R") print(acme) ``` As you can see from the previous example, each `Node` is identified by its *name*, i.e. the argument you pass into the `Node$new(name)` constructor. The name needs to be *unique* among siblings, such that paths to `Nodes` are unambiguous. `Node` inherits from `R6` reference class. This has the following implications: 1. You can call methods on a `Node` in OO style, e.g. `acme$Get("name")` 2. `Node` exhibits *reference semantics*. Thus, multiple variables in R can point to the same `Node`, and modifying a `Node` will modify it for all referencing variables. In the above code example, both `acme$IT` and `it` reference the same object. This is different from the *value semantics*, which is much more widely used in R. ### Create a tree from a `data.frame` Creating a tree programmatically is useful especially in the context of algorithms. However, most times you will create a tree by conversion. This could be by conversion from a nested list-of-lists, by conversion from another R tree-structure (e.g. an ape `phylo`), or by conversion from a `data.frame`. For more details on all the options, type `?as.Node` and refer to the *See Also* section. One of the most common conversions is the one from a `data.frame` in table format. The following code illustrates this. We load the GNI2014 data from the treemap package. This `data.frame` is in table format, meaning that each row will represent a *leaf* in the `data.tree` structure: ```{r} library(treemap) data(GNI2014) head(GNI2014) ``` Let's convert that into a `data.tree` structure! We start by defining a *pathString*. The pathString describes the hierarchy by defining a path from the root to each leaf. In this example, the hierarchy comes very naturally: ```{r} GNI2014$pathString <- paste("world", GNI2014$continent, GNI2014$country, sep = "/") ``` Once our pathString is defined, conversion to Node is very easy: ```{r} population <- as.Node(GNI2014) print(population, "iso3", "population", "GNI", limit = 20) ``` This is a simple example, and more options are available. Type `?FromDataFrameTable` for all the details. ### Create a tree from a file Often, trees are created from one of many file formats. When developing this package, We opted for a multi-step approach, meaning that you first import the file into one of the well-known R data structures. Then you convert these into a `data.tree` structure. For example, typical import patterns could be: * csv -> data.frame in table format (`?read.csv`) -> data.tree (`?as.Node.data.frame`) * Newick -> ape phylo (`?ape::read.tree`) -> data.tree (`?as.Node.phylo` ) * csv -> data.frame in network format (`?read.csv`) -> data.tree (c.f. `?FromDataFrameNetwork`) * yaml -> list of lists (`?yaml::yaml.load`) -> data.tree (`?as.Node.list`) * json -> list of lists (e.g. `?jsonlite::fromJSON`) -> data.tree (`?as.Node.list`) If you have a choice, we recommend you consider yaml format to store and share your hierarchies. It is concise, human-readable, and very easy to convert to a data.tree. An example is provided here for illustration. The data represents what platforms and OS versions a group of students use: ```{r} library(yaml) yaml <- " name: OS Students 2014/15 OS X: Yosemite: users: 16 Leopard: users: 43 Linux: Debian: users: 27 Ubuntu: users: 36 Windows: W7: users: 31 W8: users: 32 W10: users: 4 " osList <- yaml.load(yaml) osNode <- as.Node(osList) print(osNode, "users") ``` In cases where your leaf elements have no attributes, you might want to interpret them as nodes, and not as attributes. In such cases, you can use `interpretNullAsList = TRUE` to convert these into `Nodes` (instead of attributes). For example: ```{r} library(yaml) yaml <- " name: OS Students 2014/15 OS X: Yosemite: Leopard: Linux: Debian: Ubuntu: Windows: W7: W8: W10: " osList <- yaml.load(yaml) osNode <- as.Node(osList, interpretNullAsList = TRUE) osNode$printFormatters <- list(h = "\u2500" , v = "\u2502", l = "\u2514", j = "\u251C") print(osNode, "users") ``` ## Node methods As seen above, a `data.tree` structure is composed of `Node` objects, and the entry point to a `data.tree` structure is always a `Node`, often the *root* `Node` of a tree. There are different types of methods: * OO-style actives (sometimes called properties) on `Nodes`, such as e.g. `Node$isRoot` * OO-style methods on `Nodes`, such as e.g. `Node$AddChild(name)` * Classical R methods, such as e.g. `Clone(node)`. ### Actives Examples (aka Properties) Actives look and feel like attributes, but they are dynamically evaluated. They are documented in the `Node` documentation, which is accessed by typing `?Node`. Remember our population example: ```{r} print(population, limit = 15) population$isRoot population$height population$count population$totalCount population$attributes population$attributesAll population$averageBranchingFactor ``` The naming convention of the package is that attributes and actives are lower case, whereas methods are upper / CamelCase. RStudio and other IDEs work well with `data.tree`. If you have a `Node`, simply type `myNode$ + SPACE` to get a list of available attributes, actives and methods. ### OO-Style Methods Examples Examples of OO-Style methods You will find more information on these examples below. Get will traverse the tree and collect specific values for the `Nodes` it traverses: ```{r} sum(population$Get("population", filterFun = isLeaf)) ``` Prune traverses the tree and keeps only the subtrees for which the pruneFun returns TRUE. ```{r} Prune(population, pruneFun = function(x) !x$isLeaf || x$population > 1000000) ``` Note that the Prune function has side-effects, as it acts on the original population object. The population sum is now smaller: ```{r} sum(population$Get("population", filterFun = isLeaf), na.rm = TRUE) ``` ### Traditional R Methods ```{r} popClone <- Clone(acme) ``` Traditional S3 generics are available especially for conversion: ```{r} as.data.frame(acme) ``` Though there is also a more specialised non-generic version: ```{r} ToDataFrameNetwork(acme) ``` ## Climbing a tree (tree navigation) To *climb* a tree means to navigate to a specific `Node` in the `data.tree` structure. ### Navigation by path The most natural form of climbing a tree is to climb by path: ```{r} acme$IT$Outsource acme$Research$`New Labs` ``` ### Navigation by position However, there is a number of other ways to get to a specific `Node`. We can access the children of a `Node` directly through `Node$children`: ```{r} acme$children[[1]]$children[[2]]$name ``` ### Navigation by attributes Furthermore, we can not only navigate by name, but also by other attributes. This is achieved with the `Climb` method. The name of each `...` argument designates the field, and the value matches against `Nodes`. Each argument refers to the subsequent level to climb. In this example, `Climb` takes acme's child at position 1 (i.e. `Accounting`), then it takes `Accounting's` child called `New Software`: ```{r} acme$Climb(position = 1, name = "New Software")$path ``` As a shortcut, you can climb multiple levels with a single argument: ```{r} tree <- CreateRegularTree(5, 5) tree$Climb(position = c(2, 3, 4))$path ``` Finally, you can even combine. The following example starts on the root, then looks for child at position 2, then for its child at position 3. Next, we move to the child having name = "1.2.3.4", and finally its child having name "1.2.3.4.5": ```{r} tree$Climb(position = c(2, 3), name = c("1.2.3.4", "1.2.3.4.5"))$path ``` ## Custom attributes Just as with, say, a `list`, we can add any custom field to any `Node` in a `data.tree` structure. Let's go back to our acme company: ```{r} acme ``` We now add costs and probabilities to the projects in each department: ```{r} acme$Accounting$`New Software`$cost <- 1000000 acme$Accounting$`New Accounting Standards`$cost <- 500000 acme$Research$`New Product Line`$cost <- 2000000 acme$Research$`New Labs`$cost <- 750000 acme$IT$Outsource$cost <- 400000 acme$IT$`Go agile`$cost <- 250000 acme$IT$`Switch to R`$cost <- 50000 acme$Accounting$`New Software`$p <- 0.5 acme$Accounting$`New Accounting Standards`$p <- 0.75 acme$Research$`New Product Line`$p <- 0.25 acme$Research$`New Labs`$p <- 0.9 acme$IT$Outsource$p <- 0.2 acme$IT$`Go agile`$p <- 0.05 acme$IT$`Switch to R`$p <- 1 print(acme, "cost", "p") ``` Note that there is a list of reserved names you cannot use as `Node` attributes: ```{r} NODE_RESERVED_NAMES_CONST ``` ### Custom attributes in constructor An alternative, often convenient way to assign custom attributes is in the constructor, or in the `Node$AddChild` method: ```{r} birds <- Node$new("Aves", vulgo = "Bird") birds$AddChild("Neognathae", vulgo = "New Jaws", species = 10000) birds$AddChild("Palaeognathae", vulgo = "Old Jaws", species = 60) print(birds, "vulgo", "species") ``` ### Custom attributes as function Nothing stops you from setting a function as a field. This calculates a value dynamically, i.e. whenever a field is accessed in tree traversal. For example, you can add a new `Node` to your structure, and the function will reflect this. Think of this as a hierarchical spreadsheet, in which you can set formulas into cells. Consider the following example: ```{r} birds$species <- function(self) sum(sapply(self$children, function(x) x$species)) print(birds, "species") ``` data.tree maps the `self` argument to the `Node` at hand. Thus, you must name the argument `self`. Now, let's assume we discover a new species. Then, the species on the root adjusts dynamically: ```{r} birds$Palaeognathae$species <- 61 print(birds, "species") ``` This, together with the `Set` method and recursion, becomes a very powerful tool, as we'll see later. ## Printing ### Basic Printing Basic printing is easy, as you surely have noted in the previous sections. `print` displays a tree in a tree-grid view. On the left, you have the hierarchy. Then you have a column per variable you want to print: ```{r} print(acme, "cost", "p") ``` For more advanced printing, you have a few options. ### Formatters You can use *formatters* to output a variable in a certain way. You can use formatters in two ways: * You can set them on a `Node` using the `SetFormat` method. If you do this, then the formatter will be picked up as a default formatter whenever you `print`, `Get`, convert to `data.frame`, etc. Formatters can be set on any `Node` in a `data.tree` structure act on any descendant. So you can overwrite a formatter for a sub-tree. * You can add an explicit ad-hoc formatter to the `Get` method (see below). This will overwrite default formatters previously set via the `SetFormat` method. You can also set the formatter to `identity` to void a default formatter. Setting a formatter using the `SetFormat` method: ```{r} SetFormat(acme, "p", formatFun = FormatPercent) SetFormat(acme, "cost", formatFun = function(x) FormatFixedDecimal(x, digits = 2)) print(acme, "cost", "p") ``` ### Printing using `Get` Formatting with the `Get` method overwrites any formatters found along the path: ```{r} data.frame(cost = acme$Get("cost", format = function(x) FormatFixedDecimal(x, 2)), p = acme$Get("p", format = FormatPercent)) ``` ## Plotting ### `plot` `data.tree` is mainly a data structure. As it is easy to convert `data.tree` structures to other formats, you have access to a large number of tools to plot a `data.tree` structure. For example, you can plot a `data.tree` structure as a dendrogram, as an ape tree, as a treeview, etc. Additionally, `data.tree` also provides its own plotting facility. It is built on GraphViz/DiagrammeR, and you can access these features via the `plot` and `ToGraphViz` functions. Note that DiagrammeR is not required to use data.tree, so `plot` only works if DiagrammeR is installed on your system. For example: ```{r, eval = FALSE} plot(acme) ``` ![acme](assets/acme.png) ### Styling Similar to formatters for printing, you can style your tree and store the styling directly in the tree, for later use: ```{r, eval = FALSE} SetGraphStyle(acme, rankdir = "TB") SetEdgeStyle(acme, arrowhead = "vee", color = "grey35", penwidth = 2) SetNodeStyle(acme, style = "filled,rounded", shape = "box", fillcolor = "GreenYellow", fontname = "helvetica", tooltip = GetDefaultTooltip) SetNodeStyle(acme$IT, fillcolor = "LightBlue", penwidth = "5px") plot(acme) ``` ![acme](assets/acmestyle.png) For details on the styling attributes, see http://graphviz.org/Documentation.php . Note that, by default, most Node style attributes will be inherited. Though, for example, `label` will not be inherited. However, inheritance can be avoided for all style attributes, as for the Accounting node in the following example: ```{r, eval = FALSE} SetNodeStyle(acme$Accounting, inherit = FALSE, fillcolor = "Thistle", fontcolor = "Firebrick", tooltip = "This is the accounting department") plot(acme) ``` ![acme](assets/acmestyle2.png) Use `Do` to set style on specific nodes: ```{r, eval = FALSE} Do(acme$leaves, function(node) SetNodeStyle(node, shape = "egg")) plot(acme) ``` ![acme](assets/acmestyle3.png) ### Other Visualisations However, there are also endless other possibilities to visualise `data.tree` structures. There are more examples in the applications vignette. Type `vignette('applications', package = "data.tree")`. #### Dendrogram For example, using dendrogram: ```{r} plot(as.dendrogram(CreateRandomTree(nodes = 20)), center = TRUE) ``` #### igraph Or, using igraph: ```{r echo=FALSE } library(igraph, quietly = TRUE, warn.conflicts = FALSE, verbose = FALSE) ``` ```{r} library(igraph) plot(as.igraph(acme, directed = TRUE, direction = "climb")) ``` #### networkD3 Or, using networkD3: (you can actually touch these thingies and drag them around, don't be shy!) ```{r} library(networkD3) acmeNetwork <- ToDataFrameNetwork(acme, "name") simpleNetwork(acmeNetwork[-3], fontSize = 12) ``` Another example, which at the same time shows conversion from csv: ```{r} fileName <- system.file("extdata", "useR15.csv", package="data.tree") useRdf <- read.csv(fileName, stringsAsFactors = FALSE) #define the hierarchy (Session/Room/Speaker) useRdf$pathString <- paste("useR", useRdf$session, useRdf$room, useRdf$speaker, sep="|") #convert to Node useRtree <- as.Node(useRdf, pathDelimiter = "|") #plot with networkD3 useRtreeList <- ToListExplicit(useRtree, unname = TRUE) radialNetwork( useRtreeList) ``` ## Tree Conversion In order to take advantage of the R eco-system, you can convert your `data.tree` structure to other oft-used data types. The general rule is that, for each target type, there is a one-does-it-all generics, and a few more specialised conversion functions. For example, in order to convert a `data.tree` to a data.frame, you can either use `as.data.frame.Node`, or `ToDataFrameTree`, `ToDataFrameTable`, or `ToDataFrameNetwork`. The documentation for all of these variations is accessible via `?as.data.frame.Node`. ### Converting to `data.frame` As you saw just above, creating a `data.frame` is easy. Again, note that we always call such methods on the root `Node` of a `data.tree` structure, or on the root `Node` of a subtree: ```{r} acmedf <- as.data.frame(acme) as.data.frame(acme$IT) ``` The same can be achieved by using the more specialised method: ```{r, eval=FALSE} ToDataFrameTree(acme) ``` We can also add field values of the `Nodes` as columns to the `data.frame`: ```{r} ToDataFrameTree(acme, "level", "cost") ``` Note that it is not required that the field is set on each and every `Node`. Other data frame conversions are: ```{r} ToDataFrameTable(acme, "pathString", "cost") ``` ```{r} ToDataFrameNetwork(acme, "cost") ``` And, finally, we can also put attributes of our nodes in a column, based on a type discriminator. This sounds more complicated then what it is. Consider the default discriminator, `level`: ```{r} ToDataFrameTypeCol(acme, 'cost') ``` Let's look at a somewhat more advanced example. First, let's assume that for the outsourcing project, we have two separate possibilities: Outsourcing to India or outsourcing to Poland: ```{r} acme$IT$Outsource$AddChild("India") acme$IT$Outsource$AddChild("Poland") ``` Now, with this slightly more complex tree structure, the level is not a usefully discriminator anymore, because some projects are in level 3, while the new projects are in level 4. For this reason, we introduce a type field on our node objects: A node type can be a company (root only), a department (Accounting, Research, and IT), a program (Oursource), and a project (the rest, i.e. all the leaves): ```{r} acme$Set(type = c('company', 'department', 'project', 'project', 'department', 'project', 'project', 'department', 'program', 'project', 'project', 'project', 'project')) ``` Our tree now looks like this: ```{r} print(acme, 'type') ``` We can now create a data.frame in which we have one column per distinct type value. Namely, a company column, a department column, a program column, and a project column. Note that the columns are not hardcoded, but derived dynamically from your data in the tree structure: ```{r} ToDataFrameTypeCol(acme, type = 'type', prefix = NULL) ``` ### Converting to List of Lists List of lists are useful for various use cases: * as an intermediate step in converting to JSON, XML, YAML * for functions that take a lol as an input. This is especially the case for visualisations and charts, e.g with many html widgets * to save a `data.tree` structure as an R object (see performance considerations below) ```{r} data(acme) str(as.list(acme$IT)) str(ToListExplicit(acme$IT, unname = FALSE, nameName = "id", childrenName = "dependencies")) ``` ### Converting to other objects There are also conversions to igraph objects, to phylo / ape, to dendrogram, and others. For details, see `?as.phylo.Node`, `?as.dendrogram.Node`, `?as.igraph.Node`. # Tree Traversal Tree traversal is one of the core concepts of trees. See, for example, here: [Tree Traversal on Wikipedia](http://en.wikipedia.org/wiki/Tree_traversal). ## `Get` The `Get` method traverses the tree and collects values from each node. It then returns a vector or a list, containing the collected values. Additional features of the `Get` method are: * execute a function on each node, and append the function's result to the returned vector * execute a `Node` method on each node, and append the method's return value to the returned vector ### Traversal order The `Get` method can traverse the tree in various ways. This is called **traversal order**. #### Pre-Order The default traversal mode is **pre-order**. ![pre-order](assets/preorder.png) This is what is used e.g. in `print`: ```{r} print(acme, "level") ``` #### Post-Order The **post-order** traversal mode returns children first, returning parents only after all its children have been traversed and returned: ![post-order](assets/postorder.png) We can use it like this on the `Get` method: ```{r} acme$Get('level', traversal = "post-order") ``` This is useful if your parent's value depends on the children, as we'll see below. #### Ancestor This is a non-standard traversal mode that does not traverse the entire tree. Instead, the ancestor mode starts from a `Node`, then walks the tree along the path from parent to parent, up to the root. ```{r} data.frame(level = agile$Get('level', traversal = "ancestor")) ``` ### Filter and Prune You can add a filter and/or a prune function to the `Get` method. These functions have to take a `Node` as an input, and return `TRUE` if the `Node` should be considered, and `FALSE` otherwise. The difference between the `pruneFun` and the `filterFun` is that filters act only on specific nodes, whereas if the `pruneFun` returns `FALSE`, then the entire sub-tree spanned by the `Node` is ignored. For example: ```{r} acme$Get('name', pruneFun = function(x) x$position <= 2) ``` There are also some convenient filter functions available in the package, such as `isLeaf`, `isRoot`, `isNotLeaf`, etc. ```{r} acme$Get('name', filterFun = isLeaf) ``` ### Attributes The `attribute` parameter determines what is collected. This is called `attribute`, but it should not be confused with R's concept of object attributes (e.g. `?attributes`). In this context, an attribute can be either: * the name of a `Node` field * the name of a `Node` method or active * a function, whose first argument must be a Node Throughout this document, we refer to `attribute` in this sense. #### Field ```{r} acme$Get('name') ``` #### Method You can pass a standard R function to the `Get` method (and thus to `print`, `as.data.frame`, etc.). The only requirement this function must satisfy is that its first argument be of class `Node`. Subsequent arguments can be added through the ellipsis (...). For example: ```{r} ExpectedCost <- function(node, adjustmentFactor = 1) { return ( node$cost * node$p * adjustmentFactor) } acme$Get(ExpectedCost, adjustmentFactor = 0.9, filterFun = isLeaf) ``` #### Using recursion Recursion comes naturally with data.tree, and it is one of its core strengths: ```{r} Cost <- function(node) { result <- node$cost if(length(result) == 0) result <- sum(sapply(node$children, Cost)) return (result) } print(acme, "p", cost = Cost) ``` There is a built-in function that would make this example even simpler: `Aggregate`. It is explained below. ## `Do` Do is similar to `Get` in that it also traverses a tree in a specific traversal order. However, instead of fetching an attribute, it will (surprise!) do something, namely run a function. For example, we can tell the `Do` method to assign a value to each `Node` it traverses. This is especially useful if the attribute parameter is a function, as in the previous examples. For instance, we can store the aggregated cost for later use and printing: ```{r} acme$Do(function(node) node$cost <- Cost(node), filterFun = isNotLeaf) print(acme, "p", "cost") ``` ## `Set` The `Set` method is the counterpart to the `Get` method. The `Set` method takes a vector or a single value as an input, and traverses the tree in a certain order. Each `Node` is assigned a value from the vector, one after the other, recycling. ### Assigning values ```{r} acme$Set(id = 1:acme$totalCount) print(acme, "id") ``` The `Set` method can take multiple vectors as an input, and, optionally, you can define the name of the attribute. Finally, just as for the `Get` method, the **traversal order** is important for the `Set`. ```{r} secretaries <- c(3, 2, 8) employees <- c(52, 43, 51) acme$Set(secretaries, emps = employees, filterFun = function(x) x$level == 2) print(acme, "emps", "secretaries", "id") ``` ### Deleting attributes The `Set` method can also be used to assign a single value directly to all `Nodes` traversed. For example, to remove the `avgExpectedCost`, we assign `NULL` on each node, using the fact that the `Set` recycles: ```{r} acme$Set(avgExpectedCost = NULL) ``` However, note that setting a field to `NULL` will not make it gone for good. You will still see it: ```{r} acme$attributesAll ``` In order remove it completely, you can use the `RemoveAttribute` method: ```{r} acme$Do(function(node) node$RemoveAttribute("avgExpectedCost")) ``` ### Using Set and function assignment Earlier, we saw that we can add a function dynamically to a `Node`. We can, of course, also do this via the `Set` method ```{r} acme$Set(cost = c(function(self) sum(sapply(self$children, function(child) GetAttribute(child, "cost")))), filterFun = isNotLeaf) print(acme, "cost") acme$IT$AddChild("Paperless", cost = 240000) print(acme, "cost") ``` ## `Traverse` and explicit traversal Previously, we have used the `Get`, `Set` and `Do` methods in their OO-style version. This is often very convenient for quick access to variables. However, sometimes you want to re-use the same traversal for multiple sequential operations. For this, you can use what is called **explicit traversal**. It works like so: ```{r} traversal <- Traverse(acme, traversal = "post-order", filterFun = function(x) x$level == 2) Set(traversal, floor = c(1, 2, 3)) Do(traversal, function(x) { if (x$floor <= 2) { x$extension <- "044" } else { x$extension <- "043" } }) Get(traversal, "extension") ``` # Advanced Features ## `Aggregate` The `Aggregate` method provides a shorthand for the oft-used case when a parent is the aggregate of its child values, as seen in the previous example. `Aggregate` calls a function recursively on children. If a child holds the attribute, that value is returned. Otherwise, the attribute is collected from all children, and aggregated using the `aggFun`. For example: ```{r} Aggregate(node = acme, attribute = "cost", aggFun = sum) ``` We can also use this in the `Get` method, of course: ```{r, eval=FALSE} acme$Get(Aggregate, "cost", sum) ``` Note, however, that this is not very efficient: `Aggregate` will be called twice on, say, *IT*: Once when the traversal passes *IT* itself, the second time recursively when `Aggregate` is called on the root. For this reason, we have the option to store/cache the calculated value along the way. For one thing, this is a convenient way to save an additional `Set` call in case we want to store the aggregated value. Additionally, it speeds up calculation because `Aggregate` on an ancestor will use a cached value on a descendant: ```{r} acme$Do(function(node) node$cost <- Aggregate(node, attribute = "cost", aggFun = sum), traversal = "post-order") print(acme, "cost") ``` ## `Cumulate` In its simplest form, the `Cumulate` function just sums up an attribute value along siblings, taking into consideration all siblings before the `Node` on which `Cumulate` is called: ```{r} Cumulate(acme$IT$`Go agile`, "cost", sum) ``` Or, to find the minimum cost among siblings: ```{r} Cumulate(acme$IT$`Go agile`, "cost", min) ``` This can be useful in combination with traversal, e.g. to calculate a running sum among siblings. Specifically, the `cacheAttribute` lets you store the running sum in a field. This not only speeds up calculation, but lets you re-use the calculated values later: ```{r} acme$Do(function(node) node$cumCost <- Cumulate(node, attribute = "cost", aggFun = sum)) print(acme, "cost", "cumCost") ``` ## `Clone` As stated above, `Nodes` exhibit reference semantics. If you call, say, `Set`, then this changes the `Nodes` in the tree. The changes will be visible for all variables having a reference on the `data.tree` structure. As a consequence, you might want to "save away" the current state of a structure. To do this, you can `Clone` an entire tree: ```{r} acmeClone <- Clone(acme) acmeClone$name <- "New Acme" # acmeClone does not point to the same reference object anymore: acme$name == acmeClone$name ``` ## `Sort` With the `Sort` method, you can sort an entire tree, a sub-tree, or children of a specific `Node`. The method will sort recursively and sort children with respect to a child attribute. As explained earlier, the child attribute can be a function or a method. ```{r} Sort(acme, "name") acme Sort(acme, Aggregate, "cost", sum, decreasing = TRUE, recursive = TRUE) print(acme, "cost", aggCost = acme$Get(Aggregate, "cost", sum)) ``` ## `Prune` You can prune sub-trees out of a tree, by that removing an entire sub-tree from a tree. There are two variations of this: * *temporary* pruning, e.g. just for printing: This is the `pruneFun` parameter, e.g. in `Get` * *side effect* or *permanent* pruning, meaning that you modify your `data.tree` structure for good. This is achieved with the `Prune` method. Consider the following example of permanent pruning: ```{r} acme$Do(function(x) x$cost <- Aggregate(x, "cost", sum)) Prune(acme, function(x) x$cost > 700000) print(acme, "cost") ``` # Performance Considerations ## CPU The `data.tree` package has been built to work with hierarchical data, to support visualization, to foster rapid prototyping, and for other applications where development time saved is more important than computing time lost. Having said this, it becomes clear that big data and `data.tree` do not marry particularly well. Don't expect R to build your `data.tree` structure with a few million `Nodes` during your cigarette break. Do not try to convert a gigabyte JSON document to a `data.tree` structure in a testthat test case. However, if you are respecting the following guidelines, I promise that you and your `Nodes` will have a lot of fun together. So here it goes: 1. Creating a `Node` is relatively expensive. `CreateRegularTree(6, 6)` creates a `data.tree` structure with 9331 `Nodes`. On an AWS c4.large instance, this takes about 2.5 seconds. 2. `Clone` is similar to `Node` creation, with an extra penalty of about 50%. 3. Traversing (`Traverse`, `Get`, `Set` and `Do`) is relatively cheap. This is really what you would expect. `data.tree` builds on R6, i.e. reference objects. There is an overhead in creating them, as your computer needs to manage the references they hold. However, performing operations that change your tree (e.g. `Prune` or `Set`) are often faster than value semantics, as your computer does not need to copy the entire object in memory. Just to give you an order of magnitude: The following times are achieved on an AWS c4.large instance: ```{r, eval = FALSE} system.time(tree <- CreateRegularTree(6, 6)) ``` ```{r, echo = FALSE} c(user = 2.499, system = 0.009, elapsed = 2.506) ``` ```{r, eval = FALSE} system.time(tree <- Clone(tree)) ``` ```{r, echo = FALSE} c(user = 3.704, system = 0.023, elapsed = 3.726) ``` ```{r, eval = FALSE} system.time(traversal <- Traverse(tree)) ``` ```{r, echo = FALSE} c(user = 0.096, system = 0.000, elapsed = 0.097) ``` ```{r, eval = FALSE} system.time(Set(traversal, id = 1:tree$totalCount)) ``` ```{r, echo = FALSE} c(user = 0.205, system = 0.000, elapsed = 0.204) ``` ```{r, eval = FALSE} system.time(ids <- Get(traversal, "id")) ``` ```{r, echo = FALSE} c(user = 0.569, system = 0.000, elapsed = 0.569) ``` ```{r, eval = FALSE} leaves <- Traverse(tree, filterFun = isLeaf) Set(leaves, leafId = 1:length(leaves)) system.time(Get(traversal, function(node) Aggregate(node, "leafId", max))) ``` ```{r, echo = FALSE} c(user = 1.418, system = 0.000, elapsed = 1.417) ``` With caching, you can save some time: ```{r, eval = FALSE} system.time(tree$Get(function(node) Aggregate(tree, "leafId", max, "maxLeafId"), traversal = "post-order")) ``` ```{r, echo = FALSE} c(user = 0.69, system = 0.00, elapsed = 0.69) ``` ## Memory data.tree structures have a relatively large memory footprint. However, for every-day applications using modern computers, this will not normally have an impact on your work **except when saving a `data.tree` structure to disk**. For an explanation why that is the case, you might want to read this answer on [Stack Overflow](http://stackoverflow.com/questions/13912867/empty-r-environment-becomes-large-file-when-saved). Depending on your development environment, you might want to turn off the option to save the workspace to .RData on exit. ================================================ FILE: vignettes/intro.banner.html ================================================ Banner

Abraham Lincoln once said, "Give me six hours to chop down a tree and I will spend the first four sharpening the axe."
Aunt Margaret used to say, "If you dream of a forest, you'd better learn how to plant a tree."
data.tree says, "No matter if you are a lumberjack or a tree hugger. I will be your sanding block, and I will be your seed."