Repository: mllg/batchtools Branch: main Commit: ee7080fc31de Files: 320 Total size: 1.6 MB Directory structure: gitextract_lqp2x_vj/ ├── .Rbuildignore ├── .aspell/ │ ├── batchtools.rds │ └── defaults.R ├── .editorconfig ├── .github/ │ ├── dependabot.yaml │ └── workflows/ │ ├── pkgdown.yml │ └── r-cmd-check.yml ├── .gitignore ├── .ignore ├── .lintr ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R/ │ ├── Algorithm.R │ ├── ExperimentRegistry.R │ ├── Export.R │ ├── Hooks.R │ ├── Job.R │ ├── JobCollection.R │ ├── JobNames.R │ ├── JobTables.R │ ├── Joins.R │ ├── Logs.R │ ├── Problem.R │ ├── RDSReader.R │ ├── Registry.R │ ├── Tags.R │ ├── Worker.R │ ├── addExperiments.R │ ├── batchMap.R │ ├── batchMapResults.R │ ├── batchReduce.R │ ├── btlapply.R │ ├── chunkIds.R │ ├── clearRegistry.R │ ├── clusterFunctions.R │ ├── clusterFunctionsDocker.R │ ├── clusterFunctionsHyperQueue.R │ ├── clusterFunctionsInteractive.R │ ├── clusterFunctionsLSF.R │ ├── clusterFunctionsMulticore.R │ ├── clusterFunctionsOpenLava.R │ ├── clusterFunctionsSGE.R │ ├── clusterFunctionsSSH.R │ ├── clusterFunctionsSlurm.R │ ├── clusterFunctionsSocket.R │ ├── clusterFunctionsTORQUE.R │ ├── config.R │ ├── doJobCollection.R │ ├── estimateRuntimes.R │ ├── execJob.R │ ├── files.R │ ├── findJobs.R │ ├── getDefaultRegistry.R │ ├── getErrorMessages.R │ ├── getStatus.R │ ├── helpers.R │ ├── ids.R │ ├── killJobs.R │ ├── loadRegistry.R │ ├── loadResult.R │ ├── mergeRegistries.R │ ├── reduceResults.R │ ├── removeExperiments.R │ ├── removeRegistry.R │ ├── resetJobs.R │ ├── runOSCommand.R │ ├── saveRegistry.R │ ├── sleep.R │ ├── submitJobs.R │ ├── summarizeExperiments.R │ ├── sweepRegistry.R │ ├── syncRegistry.R │ ├── testJob.R │ ├── unwrap.R │ ├── updateRegisty.R │ ├── waitForFiles.R │ ├── waitForJobs.R │ └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── docs/ │ ├── 404.html │ ├── CNAME │ ├── LICENSE-text.html │ ├── articles/ │ │ ├── batchtools.html │ │ ├── batchtools_files/ │ │ │ └── header-attrs-2.4/ │ │ │ └── header-attrs.js │ │ └── index.html │ ├── authors.html │ ├── bootstrap-toc.css │ ├── bootstrap-toc.js │ ├── docsearch.css │ ├── docsearch.js │ ├── index.html │ ├── news/ │ │ └── index.html │ ├── pkgdown.css │ ├── pkgdown.js │ ├── pkgdown.yml │ └── reference/ │ ├── JobCollection.html │ ├── JobExperiment.html │ ├── JobNames.html │ ├── JoinTables.html │ ├── Tags.html │ ├── Worker.html │ ├── addAlgorithm.html │ ├── addExperiments.html │ ├── addProblem.html │ ├── assertRegistry.html │ ├── batchExport.html │ ├── batchMap.html │ ├── batchMapResults.html │ ├── batchReduce.html │ ├── batchtools-deprecated.html │ ├── batchtools-package.html │ ├── btlapply.html │ ├── cfBrewTemplate.html │ ├── cfHandleUnknownSubmitError.html │ ├── cfKillJob.html │ ├── cfReadBrewTemplate.html │ ├── chunk.html │ ├── chunkIds.html │ ├── clearRegistry.html │ ├── doJobCollection.html │ ├── estimateRuntimes.html │ ├── execJob.html │ ├── findConfFile.html │ ├── findJobs.html │ ├── findTemplateFile.html │ ├── getDefaultRegistry.html │ ├── getErrorMessages.html │ ├── getJobTable.html │ ├── getStatus.html │ ├── grepLogs.html │ ├── index.html │ ├── killJobs.html │ ├── loadRegistry.html │ ├── loadResult.html │ ├── makeClusterFunctions.html │ ├── makeClusterFunctionsDocker.html │ ├── makeClusterFunctionsInteractive.html │ ├── makeClusterFunctionsLSF.html │ ├── makeClusterFunctionsMulticore.html │ ├── makeClusterFunctionsOpenLava.html │ ├── makeClusterFunctionsSGE.html │ ├── makeClusterFunctionsSSH.html │ ├── makeClusterFunctionsSlurm.html │ ├── makeClusterFunctionsSocket.html │ ├── makeClusterFunctionsTORQUE.html │ ├── makeExperimentRegistry.html │ ├── makeRegistry.html │ ├── makeSubmitJobResult.html │ ├── reduceResults.html │ ├── reduceResultsList.html │ ├── removeExperiments.html │ ├── removeRegistry.html │ ├── resetJobs.html │ ├── runHook.html │ ├── runOSCommand.html │ ├── saveRegistry.html │ ├── showLog.html │ ├── submitJobs.html │ ├── summarizeExperiments.html │ ├── sweepRegistry.html │ ├── syncRegistry.html │ ├── testJob.html │ ├── unwrap.html │ └── waitForJobs.html ├── inst/ │ ├── CITATION │ ├── bin/ │ │ └── linux-helper │ └── templates/ │ ├── lsf-simple.tmpl │ ├── openlava-simple.tmpl │ ├── sge-simple.tmpl │ ├── slurm-dortmund.tmpl │ ├── slurm-lido3.tmpl │ ├── slurm-simple.tmpl │ ├── testJob.tmpl │ └── torque-lido.tmpl ├── man/ │ ├── JobCollection.Rd │ ├── JobExperiment.Rd │ ├── JobNames.Rd │ ├── JoinTables.Rd │ ├── Tags.Rd │ ├── Worker.Rd │ ├── addAlgorithm.Rd │ ├── addExperiments.Rd │ ├── addProblem.Rd │ ├── assertRegistry.Rd │ ├── batchExport.Rd │ ├── batchMap.Rd │ ├── batchMapResults.Rd │ ├── batchReduce.Rd │ ├── batchtools-package.Rd │ ├── btlapply.Rd │ ├── cfBrewTemplate.Rd │ ├── cfHandleUnknownSubmitError.Rd │ ├── cfKillJob.Rd │ ├── cfReadBrewTemplate.Rd │ ├── chunk.Rd │ ├── clearRegistry.Rd │ ├── doJobCollection.Rd │ ├── estimateRuntimes.Rd │ ├── execJob.Rd │ ├── findConfFile.Rd │ ├── findJobs.Rd │ ├── findTemplateFile.Rd │ ├── getDefaultRegistry.Rd │ ├── getErrorMessages.Rd │ ├── getJobTable.Rd │ ├── getStatus.Rd │ ├── grepLogs.Rd │ ├── killJobs.Rd │ ├── loadRegistry.Rd │ ├── loadResult.Rd │ ├── makeClusterFunctions.Rd │ ├── makeClusterFunctionsDocker.Rd │ ├── makeClusterFunctionsHyperQueue.Rd │ ├── makeClusterFunctionsInteractive.Rd │ ├── makeClusterFunctionsLSF.Rd │ ├── makeClusterFunctionsMulticore.Rd │ ├── makeClusterFunctionsOpenLava.Rd │ ├── makeClusterFunctionsSGE.Rd │ ├── makeClusterFunctionsSSH.Rd │ ├── makeClusterFunctionsSlurm.Rd │ ├── makeClusterFunctionsSocket.Rd │ ├── makeClusterFunctionsTORQUE.Rd │ ├── makeExperimentRegistry.Rd │ ├── makeRegistry.Rd │ ├── makeSubmitJobResult.Rd │ ├── reduceResults.Rd │ ├── reduceResultsList.Rd │ ├── removeExperiments.Rd │ ├── removeRegistry.Rd │ ├── resetJobs.Rd │ ├── runHook.Rd │ ├── runOSCommand.Rd │ ├── saveRegistry.Rd │ ├── showLog.Rd │ ├── submitJobs.Rd │ ├── summarizeExperiments.Rd │ ├── sweepRegistry.Rd │ ├── syncRegistry.Rd │ ├── testJob.Rd │ ├── unwrap.Rd │ └── waitForJobs.Rd ├── man-roxygen/ │ ├── expreg.R │ ├── id.R │ ├── ids.R │ ├── missing.val.R │ ├── more.args.R │ ├── ncpus.R │ ├── nodename.R │ ├── reg.R │ └── template.R ├── paper/ │ ├── codemeta.json │ ├── paper.bib │ └── paper.md ├── src/ │ ├── Makevars │ ├── binpack.c │ ├── count_not_missing.c │ ├── fill_gaps.c │ ├── init.c │ └── lpt.c ├── tests/ │ ├── testthat/ │ │ ├── helper.R │ │ ├── test_Algorithm.R │ │ ├── test_ClusterFunctionHyperQueue.R │ │ ├── test_ClusterFunctions.R │ │ ├── test_ClusterFunctionsMulticore.R │ │ ├── test_ClusterFunctionsSSH.R │ │ ├── test_ClusterFunctionsSocket.R │ │ ├── test_ExperimentRegistry.R │ │ ├── test_Job.R │ │ ├── test_JobCollection.R │ │ ├── test_JobNames.R │ │ ├── test_Problem.R │ │ ├── test_Registry.R │ │ ├── test_addExperiments.R │ │ ├── test_batchMap.R │ │ ├── test_batchReduce.R │ │ ├── test_btlapply.R │ │ ├── test_chunk.R │ │ ├── test_convertIds.R │ │ ├── test_count.R │ │ ├── test_doJobCollection.R │ │ ├── test_estimateRuntimes.R │ │ ├── test_export.R │ │ ├── test_findConfFile.R │ │ ├── test_findJobs.R │ │ ├── test_foreach.R │ │ ├── test_future.R │ │ ├── test_getErrorMessages.R │ │ ├── test_getJobTable.R │ │ ├── test_getStatus.R │ │ ├── test_grepLogs.R │ │ ├── test_hooks.R │ │ ├── test_joins.R │ │ ├── test_killJobs.R │ │ ├── test_manual.R │ │ ├── test_memory.R │ │ ├── test_mergeRegistries.R │ │ ├── test_parallelMap.R │ │ ├── test_reduceResults.R │ │ ├── test_removeExperiments.R │ │ ├── test_removeRegistry.R │ │ ├── test_resetJobs.R │ │ ├── test_runOSCommand.R │ │ ├── test_seed.R │ │ ├── test_showLog.R │ │ ├── test_sleep.R │ │ ├── test_submitJobs.R │ │ ├── test_summarizeExperiments.R │ │ ├── test_sweepRegistry.R │ │ ├── test_tags.R │ │ ├── test_testJob.R │ │ ├── test_unwrap.R │ │ └── test_waitForJobs.R │ └── testthat.R └── vignettes/ ├── batchtools.Rmd ├── function_overview.tex └── tikz_prob_algo_simple.tex ================================================ FILE CONTENTS ================================================ ================================================ FILE: .Rbuildignore ================================================ ^LICENSE$ ^src/.+\.o$ ^src/.+\.so$ \.swp$ ^\.ignore$ ^\.editorconfig$ ^man-roxygen$ ^.*\.Rproj$ ^\.Rproj\.user$ ^docs$ ^paper$ ^_pkgdown\.yml$ ^README.RMD$ ^.github$ registry/ ^\.lintr$ ================================================ FILE: .aspell/defaults.R ================================================ Rd_files <- vignettes <- R_files <- description <- list(encoding = "UTF-8", language = "en", dictionaries = c("en_stats", "batchtools")) ================================================ FILE: .editorconfig ================================================ # See http://editorconfig.org root = true [*] charset = utf-8 end_of_line = lf insert_final_newline = true indent_style = space trim_trailing_whitespace = true [*.{r,R}] indent_size = 2 [*.{c,h}] indent_size = 4 [*.{cpp,hpp}] indent_size = 4 [{NEWS,DESCRIPTION,LICENSE}] max_line_length = 80 ================================================ FILE: .github/dependabot.yaml ================================================ version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" ================================================ FILE: .github/workflows/pkgdown.yml ================================================ # pkgdown workflow of the mlr3 ecosystem v0.1.0 # https://github.com/mlr-org/actions on: push: branches: - main pull_request: branches: - main release: types: - published workflow_dispatch: name: pkgdown jobs: pkgdown: runs-on: ubuntu-latest concurrency: group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} TORCH_INSTALL: 1 steps: - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 - uses: r-lib/actions/setup-r@v2 - name: Install system dependencies if: runner.os == 'Linux' run: | while read -r cmd do eval sudo $cmd done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') sudo apt-get install -y libopenmpi-dev openmpi-bin - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::pkgdown, local::. needs: website - name: Install template run: pak::pkg_install("mlr-org/mlr3pkgdowntemplate") shell: Rscript {0} - name: Build site run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0} - name: Deploy if: github.event_name != 'pull_request' uses: JamesIves/github-pages-deploy-action@v4.7.3 with: clean: false branch: gh-pages folder: docs ================================================ FILE: .github/workflows/r-cmd-check.yml ================================================ # r cmd check workflow of the mlr3 ecosystem v0.2.0 # https://github.com/mlr-org/actions on: workflow_dispatch: inputs: debug_enabled: type: boolean description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' required: false default: false push: branches: - main pull_request: branches: - main schedule: - cron: '0 4 * * 1' name: r-cmd-check jobs: r-cmd-check: runs-on: ${{ matrix.config.os }} name: ${{ matrix.config.os }} (${{ matrix.config.r }}) strategy: fail-fast: false matrix: config: - {os: ubuntu-latest, r: 'devel'} - {os: ubuntu-latest, r: 'release'} - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true _R_CHECK_FORCE_SUGGESTS_: 0 RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - uses: r-lib/actions/setup-pandoc@v2 - name: Install OpenMPI (macOS) if: runner.os == 'macOS' run: | brew install open-mpi - name: Install system dependencies if: runner.os == 'Linux' run: | while read -r cmd do eval sudo $cmd done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') sudo apt-get install -y libopenmpi-dev openmpi-bin - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::rcmdcheck needs: check - uses: r-lib/actions/check-r-package@v2 - uses: mxschmitt/action-tmate@v3 if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} with: limit-access-to-actor: true ================================================ FILE: .gitignore ================================================ .DS_Store /inst/doc /src/*.so /src/*.o /vignettes/*.html /vignettes/*.pdf /.Rproj.user /batchtools.Rproj *.tar.gz *.Rcheck README.html ================================================ FILE: .ignore ================================================ man/ docs/ ================================================ FILE: .lintr ================================================ linters: linters_with_defaults( # lintr defaults: https://lintr.r-lib.org/reference/default_linters.html # the following setup changes/removes certain linters assignment_linter = NULL, # do not force using <- for assignments object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names commented_code_linter = NULL, # allow code in comments line_length_linter(200L), object_length_linter(40L), undesirable_function_linter(fun = c( # base messaging cat = "use catf()", stop = "use stopf()", warning = "use warningf()", message = "use messagef()", # perf ifelse = "use fifelse()", rank = "use frank()" )) ) ================================================ FILE: DESCRIPTION ================================================ Package: batchtools Title: Tools for Computation on Batch Systems Version: 0.9.18 Authors@R: c( person("Michel", "Lang", , "michellang@gmail.com", role = c("cre", "aut"), comment = c(ORCID = "0000-0001-9754-0393")), person("Bernd", "Bischl", , "bernd_bischl@gmx.net", role = "aut"), person("Dirk", "Surmann", , "surmann@statistik.tu-dortmund.de", role = "ctb", comment = c(ORCID = "0000-0003-0873-137X")) ) Description: As a successor of the packages 'BatchJobs' and 'BatchExperiments', this package provides a parallel implementation of the Map function for high performance computing systems managed by schedulers 'IBM Spectrum LSF' (), 'Univa Grid Engine'/'Oracle Grid Engine' (), 'Slurm' (), 'TORQUE/PBS' (), or 'Docker Swarm' (). A multicore and socket mode allow the parallelization on a local machines, and multiple machines can be hooked up via SSH to create a makeshift cluster. Moreover, the package provides an abstraction mechanism to define large-scale computer experiments in a well-organized and reproducible way. License: LGPL-3 URL: https://github.com/mlr-org/batchtools, https://batchtools.mlr-org.com BugReports: https://github.com/mlr-org/batchtools/issues Depends: R (>= 3.0.0) Imports: backports (>= 1.1.2), base64url (>= 1.1), brew, checkmate (>= 1.8.5), data.table (>= 1.11.2), digest (>= 0.6.9), fs (>= 1.2.0), parallel, progress (>= 1.1.1), R6, rappdirs, stats, stringi, utils, withr (>= 2.0.0) Suggests: debugme, doMPI, doParallel, e1071, foreach, future, future.batchtools, jsonlite, knitr, parallelMap, ranger, rmarkdown, rpart, snow, testthat, tibble VignetteBuilder: knitr ByteCompile: yes Encoding: UTF-8 NeedsCompilation: yes Roxygen: list(r6 = FALSE) RoxygenNote: 7.3.3 ================================================ FILE: LICENSE ================================================ GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. ================================================ FILE: NAMESPACE ================================================ # Generated by roxygen2: do not edit by hand S3method(doJobCollection,JobCollection) S3method(doJobCollection,character) S3method(execJob,Experiment) S3method(execJob,Job) S3method(execJob,JobCollection) S3method(execJob,character) S3method(getJob,ExperimentCollection) S3method(getJob,JobCollection) S3method(getJobPars,ExperimentRegistry) S3method(getJobPars,Registry) S3method(makeJob,ExperimentRegistry) S3method(makeJob,Registry) S3method(makeJobCollection,ExperimentRegistry) S3method(makeJobCollection,Registry) S3method(print,ClusterFunctions) S3method(print,ExperimentRegistry) S3method(print,JobCollection) S3method(print,Registry) S3method(print,RuntimeEstimate) S3method(print,Status) S3method(print,SubmitJobResult) S3method(runHook,JobCollection) S3method(runHook,Registry) export(Worker) export(addAlgorithm) export(addExperiments) export(addJobTags) export(addProblem) export(ajoin) export(assertRegistry) export(batchExport) export(batchMap) export(batchMapResults) export(batchReduce) export(binpack) export(btlapply) export(btmapply) export(cfBrewTemplate) export(cfHandleUnknownSubmitError) export(cfKillJob) export(cfReadBrewTemplate) export(chunk) export(clearRegistry) export(doJobCollection) export(estimateRuntimes) export(execJob) export(findConfFile) export(findDone) export(findErrors) export(findExperiments) export(findExpired) export(findJobs) export(findNotDone) export(findNotStarted) export(findNotSubmitted) export(findOnSystem) export(findQueued) export(findRunning) export(findStarted) export(findSubmitted) export(findTagged) export(findTemplateFile) export(flatten) export(getDefaultRegistry) export(getErrorMessages) export(getJobNames) export(getJobPars) export(getJobResources) export(getJobStatus) export(getJobTable) export(getJobTags) export(getLog) export(getStatus) export(getUsedJobTags) export(grepLogs) export(ijoin) export(killJobs) export(ljoin) export(loadRegistry) export(loadResult) export(lpt) export(makeClusterFunctions) export(makeClusterFunctionsDocker) export(makeClusterFunctionsHyperQueue) export(makeClusterFunctionsInteractive) export(makeClusterFunctionsLSF) export(makeClusterFunctionsMulticore) export(makeClusterFunctionsOpenLava) export(makeClusterFunctionsSGE) export(makeClusterFunctionsSSH) export(makeClusterFunctionsSlurm) export(makeClusterFunctionsSocket) export(makeClusterFunctionsTORQUE) export(makeExperimentRegistry) export(makeJob) export(makeJobCollection) export(makeRegistry) export(makeSubmitJobResult) export(ojoin) export(reduceResults) export(reduceResultsDataTable) export(reduceResultsList) export(removeAlgorithms) export(removeExperiments) export(removeJobTags) export(removeProblems) export(removeRegistry) export(resetJobs) export(rjoin) export(runHook) export(runOSCommand) export(saveRegistry) export(setDefaultRegistry) export(setJobNames) export(showLog) export(sjoin) export(submitJobs) export(summarizeExperiments) export(sweepRegistry) export(syncRegistry) export(testJob) export(ujoin) export(unwrap) export(waitForJobs) import(checkmate) import(data.table) import(stringi) import(utils) importFrom(R6,R6Class) importFrom(base64url,base32_decode) importFrom(base64url,base32_encode) importFrom(brew,brew) importFrom(digest,digest) importFrom(progress,progress_bar) importFrom(rappdirs,site_config_dir) importFrom(rappdirs,user_config_dir) importFrom(stats,pexp) importFrom(stats,predict) importFrom(stats,runif) importFrom(withr,local_dir) importFrom(withr,local_options) importFrom(withr,with_dir) importFrom(withr,with_seed) useDynLib(batchtools,c_binpack) useDynLib(batchtools,c_lpt) useDynLib(batchtools,count_not_missing) useDynLib(batchtools,fill_gaps) ================================================ FILE: NEWS.md ================================================ # batchtools 0.9.18 * Fixed CRAN issues with documentation # batchtools 0.9.17 * Fixed a bug in the finalizer of `ClusterFunctionsMulticore`. # batchtools 0.9.16 * Fixed a bug in `addExperiments()` in combination with combination method `"bind"` and repls > 1 where experiments have been duplicated. * `addExperiments()` now also accepts a vector of replications (instead of a single scalar value) for argument `repls`. * Improved handling of jobs in `ClusterFunctionsSlurm`. * Fixed a bug in `waitForJobs()` * Fixed some assertions. # batchtools 0.9.15 * Maintenance update. # batchtools 0.9.14 * `batchMap()` now supports unnamed `more.args`. * Exports are now assigned with `delayedAssign()`. * Fix an option in the LSF template. # batchtools 0.9.13 * Maintenance release for R-4.0.0. # batchtools 0.9.12 * Moved `data.table` from `Depends` to `Imports`. User scripts might need to explicitly attach `data.table` via `library()` now. * Fixes for `ClusterFunctionsMulticore`. * Removed a workaround for `system2()` for R-devel (to be released as R-4.0.0). * New configuration option `compress` to select the compression algorithm (passed down to `saveRDS()`). # batchtools 0.9.11 * Removed deprecated function `chunkIds()`. * New default for argument `fs.timeout` in the cluster function constructor is `0` (was `NA` before). * Fixed a unit test for OSX. * Improved stability and documentation. * Fixed memory usage calculation. # batchtools 0.9.10 * Exported functions `findConfFile()` and `findTemplateFile()`. * Dropped support for providing a template file directly as string. A valid file is now always required. * Fixed writing to `TMPDIR` instead of the R session's temporary directory. # batchtools 0.9.9 * RDS files are explicitly stored in version 2 to ensure backward compatibility with R versions prior to 3.5.0. * Package `fs` is now used internally for all file system operations. * Support for per-site configuration files and templates to be set up by system administrators. * The print of `getStatus()` now includes a time stamp. * `chunk()` now optionally shuffles the ids before chunking. * Support for setting per-job resources in `submitJobs()`. * Example templates now include resources for `blas.threads` and `omp.threads`. * Some bug fixes regarding read-only registries. # batchtools 0.9.8 * Renamed column "memory" in the status table to "mem.used" to avoid name clashes with the resource specification. * Exported function `assertRegistry()`. * New function `unwrap()` as alias to `flatten()`. The latter causes a name clash with package `purrr` and will be deprecated in a future version. * Registries now contain a unique hash which is updated each time the registry is altered. Can be utilized to invalidate caches, e.g. the cache of knitr. # batchtools 0.9.7 * Added a workaround for a test to be compatible with testthat v2.0.0. * Better and more customizable handling of expired jobs in `waitForJobs()`. * Package `foreach` is now supported for nested parallelization as an alternative to `parallelMap`. * Depreciated argument flatten has been removed. * New helper function `flatten()` to manually unnest/unwrap lists in data frames. * Removed functions `getProblemIds()` and `getAlgorithmIds()`. Instead, you can just access `reg$problems` or `reg$algorithms`, respectively. * The number of the maximum concurrent jobs can now also be controlled via setting resources. * Internal data base changes to speed up some operations. Old registries are updated on first load by `loadRegistry()`. * Fixed a bug where the sleep mechanism between queries was not working. * Fixed a bug where submit errors on SLURM and TORQUE were not detected as temporary. # batchtools 0.9.6 * Fixed a bug where the wrong problem was retrieved from the cache. This was only triggered for chunked jobs in combination with an `ExperimentRegistry`. # batchtools 0.9.5 * Added a missing routine to upgrade registries created with batchtools prior to v0.9.3. * Fixed a bug where the registry could not be synced if jobs failed during initialization (#135). * The sleep duration for `waitForJobs()` and `submitJobs()` can now be set via the configuration file. * A new heuristic will try to detect if the registry has been altered by a simultaneously running R session. If this is detected, the registry in the current session will be set to a read-only state. * `waitForJobs()` has been reworked to allow control over the heuristic to detect expired jobs. Jobs are treated as expired if they have been submitted but are not detected on the system for `expire.after` iterations (default 3 iterations, before 1 iteration). * New argument `writeable` for `loadRegistry()` to allow loading registries explicitly as read-only. * Removed argument `update.paths` from `loadRegistry()`. Paths are always updated, but the registry on the file system remains unchanged unless loaded in read-write mode. * `ClusterFunctionsSlurm` now come with an experimental nodename argument. If set, all communication with the master is handled via SSH which effectively allows you to submit jobs from your local machine instead of the head node. Note that mounting the file system (e.g., via SSHFS) is mandatory. # batchtools 0.9.4 * Fixed handling of `file.dir` with special chars like whitespace. * All backward slashes will now be converted to forward slashes on windows. * Fixed order of arguments in `findExperiments()` (argument `ids` is now first). * Removed code to upgrade registries created with versions prior to v0.9.0 (first CRAN release). * `addExperiments()` now warns if a design is passed as `data.frame` with factor columns and `stringsAsFactors` is `TRUE`. * Added functions `setJobNames()` and `getJobNames()` to control the name of jobs on batch systems. Templates should be adapted to use `job.name` instead of `job.hash` for naming. * Argument `flatten` of `getJobResources()`, `getJobPars()` and `getJobTable()` is deprecated and will be removed. Future versions of the functions will behave like `flatten` is set to `FALSE` explicitly. Single resources/parameters must be extracted manually (or with `tidyr::unnest()`). # batchtools 0.9.3 * Running jobs now are also included while querying for status "started". This affects `findStarted()`, `findNotStarted()` and `getStatus()`. * `findExperiments()` now performs an exact string match (instead of matching substrings) for patterns specified via `prob.name` and `algo.name`. For substring matching, use `prob.pattern` or `algo.pattern`, respectively. * Changed arguments for `reduceResultsDataTable()` * Removed `fill`, now is always `TRUE` * Introduced `flatten` to control if the result should be represented as a column of lists or flattened as separate columns. Defaults to a backward-compatible heuristic, similar to `getJobPars`. * Improved heuristic to lookup template files. Templates shipped with the package can now be used by providing just the file name (w/o extension). * Updated CITATION # batchtools 0.9.2 * Full support for array jobs on Slurm and TORQUE. * Array jobs have been disabled for SGE and LSF (due to missing information about the output format) but will be re-enable in a future release. Note that the variable `n.array.jobs` has been removed from `JobCollection` in favor of the new variable `array.jobs` (logical). * `findExperiments()` now has two additional arguments to match using regular expressions. The possibility to prefix a string with "~" to enable regular expression matching has been removed. * New function `batchReduce()`. * New function `estimateRuntimes()`. * New function `removeRegistry()`. * Missing result files are now handled more consistently, raising an exception in its defaults if the result is not available. The argument `missing.val` has been added to `reduceResultsList()` and `reduceResultsDataTable()` and removed from `loadResult()` and `batchMapResults()`. * Timestamps are now stored with sub-second accuracy. * Renamed Torque to TORQUE. This especially affects the constructor `makeClusterFunctionsTorque` which now must be called via `makeClusterFunctionsTORQUE()` * `chunkIds()` has been deprecated. Use `chunk()`, `lpt()` or `binpack()` instead. * Fixed listing of jobs for `ClusterFunctionsLSF` and `ClusterFunctionsOpenLava` (thanks to @phaverty). * Job hashes are now prefixed with the literal string 'job' to ensure they start with a letter as required by some SGE systems. * Fixed handling of `NULL` results in `reduceResultsList()` * Fixed key lookup heuristic join functions. * Fixed a bug where `getJobTable()` returned `difftimes` with the wrong unit (e.g., in minutes instead of seconds). * Deactivated swap allocation for `ClusterFunctionsDocker`. * The package is now more patient while communicating with the scheduler or file system by using a timeout-based approach. This should make the package more reliable and robust under heavy load. # batchtools 0.9.0 Initial CRAN release. See the vignette for a brief comparison with [BatchJobs](https://cran.r-project.org/package=BatchJobs)/[BatchExperiments](https://cran.r-project.org/package=BatchExperiments). ================================================ FILE: R/Algorithm.R ================================================ #' @title Define Algorithms for Experiments #' #' @description #' Algorithms are functions which get the \code{data} part as well as the problem instance (the return value of the #' function defined in \code{\link{Problem}}) and return an arbitrary R object. #' #' This function serializes all components to the file system and registers the algorithm in the \code{\link{ExperimentRegistry}}. #' #' \code{removeAlgorithm} removes all jobs from the registry which depend on the specific algorithm. #' \code{reg$algorithms} holds the IDs of already defined algorithms. #' #' @param name [\code{character(1)}]\cr #' Unique identifier for the algorithm. #' @param fun [\code{function}]\cr #' The algorithm function. The static problem part is passed as \dQuote{data}, the generated #' problem instance is passed as \dQuote{instance} and the \code{\link{Job}}/\code{\link{Experiment}} as \dQuote{job}. #' Therefore, your function must have the formal arguments \dQuote{job}, \dQuote{data} and \dQuote{instance} (or dots \code{...}). #' #' If you do not provide a function, it defaults to a function which just returns the instance. #' @template expreg #' @return [\code{Algorithm}]. Object of class \dQuote{Algorithm}. #' @aliases Algorithm #' @seealso \code{\link{Problem}}, \code{\link{addExperiments}} #' @export addAlgorithm = function(name, fun = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, class = "ExperimentRegistry", writeable = TRUE) assertString(name, min.chars = 1L) if (!stri_detect_regex(name, "^[[:alnum:]_.-]+$")) stopf("Illegal characters in problem name: %s", name) if (is.null(fun)) { fun = function(job, data, instance, ...) instance } else { assert(checkFunction(fun, args = c("job", "data", "instance")), checkFunction(fun, args = "...")) } info("Adding algorithm '%s'", name) algo = setClasses(list(fun = fun, name = name), "Algorithm") writeRDS(algo, file = getAlgorithmURI(reg, name), compress = reg$compress) reg$algorithms = union(reg$algorithms, name) saveRegistry(reg) invisible(algo) } #' @export #' @rdname addAlgorithm removeAlgorithms = function(name, reg = getDefaultRegistry()) { assertRegistry(reg, class = "ExperimentRegistry", writeable = TRUE, running.ok = FALSE) assertCharacter(name, any.missing = FALSE) assertSubset(name, reg$algorithms) algorithm = NULL for (nn in name) { def.ids = reg$defs[algorithm == nn, "def.id"] job.ids = filter(def.ids, reg$status, "job.id") info("Removing Algorithm '%s' and %i corresponding jobs ...", nn, nrow(job.ids)) file_remove(getAlgorithmURI(reg, nn)) reg$defs = reg$defs[!def.ids] reg$status = reg$status[!job.ids] reg$algorithms = chsetdiff(reg$algorithms, nn) } sweepRegistry(reg) invisible(TRUE) } getAlgorithmURI = function(reg, name) { fs::path(dir(reg, "algorithms"), mangle(name)) } ================================================ FILE: R/ExperimentRegistry.R ================================================ #' @title ExperimentRegistry Constructor #' #' @description #' \code{makeExperimentRegistry} constructs a special \code{\link{Registry}} which #' is suitable for the definition of large scale computer experiments. #' #' Each experiments consists of a \code{\link{Problem}} and an \code{\link{Algorithm}}. #' These can be parametrized with \code{\link{addExperiments}} to actually define computational #' jobs. #' #' @inheritParams makeRegistry #' @aliases ExperimentRegistry #' @return [\code{ExperimentRegistry}]. #' @export #' @family Registry Experiment #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE) #' #' # Definde one problem, two algorithms and add them with some parameters: #' addProblem(reg = tmp, "p1", #' fun = function(job, data, n, mean, sd, ...) rnorm(n, mean = mean, sd = sd)) #' addAlgorithm(reg = tmp, "a1", fun = function(job, data, instance, ...) mean(instance)) #' addAlgorithm(reg = tmp, "a2", fun = function(job, data, instance, ...) median(instance)) #' ids = addExperiments(reg = tmp, list(p1 = data.table::CJ(n = c(50, 100), mean = -2:2, sd = 1:4))) #' #' # Overview over defined experiments: #' tmp$problems #' tmp$algorithms #' summarizeExperiments(reg = tmp) #' summarizeExperiments(reg = tmp, by = c("problem", "algorithm", "n")) #' ids = findExperiments(prob.pars = (n == 50), reg = tmp) #' print(unwrap(getJobPars(ids, reg = tmp))) #' #' # Submit jobs #' submitJobs(reg = tmp) #' waitForJobs(reg = tmp) #' #' # Reduce the results of algorithm a1 #' ids.mean = findExperiments(algo.name = "a1", reg = tmp) #' reduceResults(ids.mean, fun = function(aggr, res, ...) c(aggr, res), reg = tmp) #' #' # Join info table with all results and calculate mean of results #' # grouped by n and algorithm #' ids = findDone(reg = tmp) #' pars = unwrap(getJobPars(ids, reg = tmp)) #' results = unwrap(reduceResultsDataTable(ids, fun = function(res) list(res = res), reg = tmp)) #' tab = ljoin(pars, results) #' tab[, list(mres = mean(res)), by = c("n", "algorithm")] makeExperimentRegistry = function(file.dir = "registry", work.dir = getwd(), conf.file = findConfFile(), packages = character(0L), namespaces = character(0L), source = character(0L), load = character(0L), seed = NULL, make.default = TRUE) { reg = makeRegistry(file.dir = file.dir, work.dir = work.dir, conf.file = conf.file, packages = packages, namespaces = namespaces, source = source, load = load, seed = seed, make.default = make.default) fs::dir_create(fs::path(reg$file.dir, c("problems", "algorithms"))) reg$problems = character(0L) reg$algorithms = character(0L) reg$status$repl = integer(0L) reg$defs$problem = character(0L) reg$defs$algorithm = character(0L) reg$defs$job.pars = NULL reg$defs$prob.pars = list() reg$defs$algo.pars = list() reg$defs$pars.hash = character(0L) class(reg) = c("ExperimentRegistry", "Registry") saveRegistry(reg) return(reg) } #' @export print.ExperimentRegistry = function(x, ...) { cat("Experiment Registry\n") catf(" Backend : %s", x$cluster.functions$name) catf(" File dir : %s", x$file.dir) catf(" Work dir : %s", x$work.dir) catf(" Jobs : %i", nrow(x$status)) catf(" Problems : %i", length(x$problems)) catf(" Algorithms: %i", length(x$algorithms)) catf(" Seed : %i", x$seed) catf(" Writeable : %s", x$writeable) } ================================================ FILE: R/Export.R ================================================ #' @title Export Objects to the Slaves #' #' @description #' Objects are saved in subdirectory \dQuote{exports} of the #' \dQuote{file.dir} of \code{reg}. #' They are automatically loaded and placed in the global environment #' each time the registry is loaded or a job collection is executed. #' #' @param export [\code{list}]\cr #' Named list of objects to export. #' @param unexport [\code{character}]\cr #' Vector of object names to unexport. #' @template reg #' @return [\code{data.table}] with name and uri to the exported objects. #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' #' # list exports #' exports = batchExport(reg = tmp) #' print(exports) #' #' # add a job and required exports #' batchMap(function(x) x^2 + y + z, x = 1:3, reg = tmp) #' exports = batchExport(export = list(y = 99, z = 1), reg = tmp) #' print(exports) #' #' submitJobs(reg = tmp) #' waitForJobs(reg = tmp) #' stopifnot(loadResult(1, reg = tmp) == 101) #' #' # Un-export z #' exports = batchExport(unexport = "z", reg = tmp) #' print(exports) batchExport = function(export = list(), unexport = character(0L), reg = getDefaultRegistry()) { assertRegistry(reg, writeable = (length(export) > 0L || length(unexport) > 0L)) assertList(export, names = "named") assertCharacter(unexport, any.missing = FALSE, min.chars = 1L) path = fs::path(reg$file.dir, "exports") if (length(export) > 0L) { nn = names(export) fn = fs::path(path, mangle(nn)) found = fs::file_exists(fn) if (any(!found)) info("Exporting new objects: '%s' ...", stri_flatten(nn[!found], "','")) if (any(found)) info("Overwriting previously exported object: '%s'", stri_flatten(nn[found], "','")) Map(writeRDS, object = export, file = fn, compress = reg$compress) } if (length(unexport) > 0L) { fn = fs::path(path, mangle(unexport)) found = fs::file_exists(fn) if (any(found)) info("Un-exporting exported objects: '%s' ...", stri_flatten(unexport[found], "','")) file_remove(fn[found]) } fns = list.files(path, pattern = "\\.rds") invisible(data.table(name = unmangle(fns), uri = fs::path(path, fns))) } ================================================ FILE: R/Hooks.R ================================================ #' @title Trigger Evaluation of Custom Function #' #' @description #' Hooks allow to trigger functions calls on specific events. #' They can be specified via the \code{\link{ClusterFunctions}} and are triggered on the following events: #' \describe{ #' \item{\code{pre.sync}}{\code{function(reg, fns, ...)}: Run before synchronizing the registry on the master. \code{fn} is the character vector of paths to the update files.} #' \item{\code{post.sync}}{\code{function(reg, updates, ...)}: Run after synchronizing the registry on the master. \code{updates} is the data.table of processed updates.} #' \item{\code{pre.submit.job}}{\code{function(reg, ...)}: Run before a job is successfully submitted to the scheduler on the master.} #' \item{\code{post.submit.job}}{\code{function(reg, ...)}: Run after a job is successfully submitted to the scheduler on the master.} #' \item{\code{pre.submit}}{\code{function(reg, ...)}: Run before any job is submitted to the scheduler.} #' \item{\code{post.submit}}{\code{function(reg, ...)}: Run after a jobs are submitted to the schedule.} #' \item{\code{pre.do.collection}}{\code{function(reg, reader, ...)}: Run before starting the job collection on the slave. #' \code{reader} is an internal cache object.} #' \item{\code{post.do.collection}}{\code{function(reg, updates, reader, ...)}: Run after all jobs in the chunk are terminated on the slave. #' \code{updates} is a \code{\link[data.table]{data.table}} of updates which will be merged with the \code{\link{Registry}} by the master. #' \code{reader} is an internal cache object.} #' \item{\code{pre.kill}}{\code{function(reg, ids, ...)}: Run before any job is killed.} #' \item{\code{post.kill}}{\code{function(reg, ids, ...)}: Run after jobs are killed. \code{ids} is the return value of \code{\link{killJobs}}.} #' } #' #' @param obj [\link{Registry} | \link{JobCollection}]\cr #' Registry which contains the \link{ClusterFunctions} with element \dQuote{hooks} #' or a \link{JobCollection} which holds the subset of functions which are executed #' remotely. #' @param hook [\code{character(1)}]\cr #' ID of the hook as string. #' @param ... [ANY]\cr #' Additional arguments passed to the function referenced by \code{hook}. #' See description. #' @return Return value of the called function, or \code{NULL} if there is no hook #' with the specified ID. #' @aliases Hooks Hook #' @export runHook = function(obj, hook, ...) { UseMethod("runHook") } #' @export runHook.Registry = function(obj, hook, ...) { f = obj$cluster.functions$hooks[[hook]] if (is.null(f)) return(NULL) "!DEBUG [runHook]: Running hook '`hook`'" f(obj, ...) } #' @export runHook.JobCollection = function(obj, hook, ...) { f = obj$hooks[[hook]] if (is.null(f)) return(NULL) "!DEBUG [runHook]: Running hook '`hook`'" f(obj, ...) } ================================================ FILE: R/Job.R ================================================ BaseJob = R6Class("BaseJob", cloneable = FALSE, public = list( file.dir = NULL, id = NULL, seed = NULL, resources = NULL, reader = NULL, initialize = function(file.dir, reader, id, seed, resources) { self$file.dir = file.dir self$reader = reader self$id = id self$seed = seed self$resources = resources } ), active = list( job.id = function() { # alias for id. This is confusing not to have. self$id }, external.dir = function() { fs::dir_create(fs::path(self$file.dir, "external", self$id)) } ) ) Job = R6Class("Job", cloneable = FALSE, inherit = BaseJob, public = list( job.pars = NULL, initialize = function(file.dir, reader, id, job.pars, seed, resources) { self$job.pars = job.pars super$initialize(file.dir, reader, id, seed, resources) } ), active = list( fun = function() { self$reader$get(fs::path(self$file.dir, "user.function.rds")) }, pars = function() { c(self$job.pars, self$reader$get(fs::path(self$file.dir, "more.args.rds"))) } ) ) Experiment = R6Class("Experiment", cloneable = FALSE, inherit = BaseJob, public = list( repl = NA_integer_, prob.name = NULL, algo.name = NULL, prob.pars = NULL, algo.pars = NULL, compress = NULL, allow.access.to.instance = TRUE, initialize = function(file.dir, reader, id, prob.pars, algo.pars, repl, seed, resources, prob.name, algo.name, compress = "gzip") { super$initialize(file.dir, reader, id,seed, resources) self$repl = repl self$prob.name = as.character(prob.name) self$prob.pars = prob.pars self$algo.name = as.character(algo.name) self$algo.pars = algo.pars self$compress = compress } ), active = list( problem = function() { self$reader$get(getProblemURI(self, self$prob.name), slot = "..problem..") }, algorithm = function() { self$reader$get(getAlgorithmURI(self, self$algo.name)) }, pars = function() { list(prob.pars = self$prob.pars, algo.pars = self$algo.pars) }, instance = function() { if (!self$allow.access.to.instance) stop("You cannot access 'job$instance' in the problem generation or algorithm function") p = self$problem if (p$cache) { cache.file = getProblemCacheURI(self) if (fs::file_exists(cache.file)) { result = try(readRDS(cache.file)) if (!inherits(result, "try-error")) return(result) } } seed = if (is.null(p$seed)) self$seed else getSeed(p$seed, self$repl - 1L) wrapper = function(...) p$fun(job = self, data = p$data, ...) result = with_seed(seed, do.call(wrapper, self$prob.pars, envir = .GlobalEnv)) if (p$cache) writeRDS(result, file = cache.file, compress = self$compress) return(result) } ) ) #' @title Jobs and Experiments #' #' @description #' Jobs and Experiments are abstract objects which hold all information necessary to execute a single computational #' job for a \code{\link{Registry}} or \code{\link{ExperimentRegistry}}, respectively. #' #' They can be created using the constructor \code{makeJob} which takes a single job id. #' Jobs and Experiments are passed to reduce functions like \code{\link{reduceResults}}. #' Furthermore, Experiments can be used in the functions of the \code{\link{Problem}} and \code{\link{Algorithm}}. #' Jobs and Experiments hold these information: #' \describe{ #' \item{\code{job.id}}{Job ID as integer.} #' \item{\code{pars}}{ #' Job parameters as named list. #' For \code{\link{ExperimentRegistry}}, the parameters are divided into the sublists \dQuote{prob.pars} and \dQuote{algo.pars}. #' } #' \item{\code{seed}}{Seed which is set via \code{\link{doJobCollection}} as scalar integer.} #' \item{\code{resources}}{Computational resources which were set for this job as named list.} #' \item{\code{external.dir}}{ #' Path to a directory which is created exclusively for this job. You can store external files here. #' Directory is persistent between multiple restarts of the job and can be cleaned by calling \code{\link{resetJobs}}. #' } #' \item{\code{fun}}{Job only: User function passed to \code{\link{batchMap}}.} #' \item{\code{prob.name}}{Experiments only: Problem id.} #' \item{\code{algo.name}}{Experiments only: Algorithm id.} #' \item{\code{problem}}{Experiments only: \code{\link{Problem}}.} #' \item{\code{instance}}{Experiments only: Problem instance.} #' \item{\code{algorithm}}{Experiments only: \code{\link{Algorithm}}.} #' \item{\code{repl}}{Experiments only: Replication number.} #' } #' #' Note that the slots \dQuote{pars}, \dQuote{fun}, \dQuote{algorithm} and \dQuote{problem} #' lazy-load required files from the file system and construct the object on the first access. #' The realizations are cached for all slots except \dQuote{instance} (which might be stochastic). #' #' Jobs and Experiments can be executed manually with \code{\link{execJob}}. #' #' @template id #' @param reader [\code{RDSReader} | \code{NULL}]\cr #' Reader object to retrieve files. Used internally to cache reading from the file system. #' The default (\code{NULL}) does not make use of caching. #' @template reg #' @return [\code{Job} | \code{Experiment}]. #' @aliases Job Experiment #' @rdname JobExperiment #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' batchMap(function(x, y) x + y, x = 1:2, more.args = list(y = 99), reg = tmp) #' submitJobs(resources = list(foo = "bar"), reg = tmp) #' job = makeJob(1, reg = tmp) #' print(job) #' #' # Get the parameters: #' job$pars #' #' # Get the job resources: #' job$resources #' #' # Execute the job locally: #' execJob(job) makeJob = function(id, reader = NULL, reg = getDefaultRegistry()) { UseMethod("makeJob", object = reg) } #' @export makeJob.Registry = function(id, reader = NULL, reg = getDefaultRegistry()) { row = mergedJobs(reg, convertId(reg, id), c("job.id", "job.pars", "resource.id")) resources = reg$resources[row, "resources", on = "resource.id", nomatch = NA]$resources[[1L]] %??% list() Job$new(file.dir = reg$file.dir, reader %??% RDSReader$new(FALSE), id = row$job.id, job.pars = row$job.pars[[1L]], seed = getSeed(reg$seed, row$job.id), resources = resources) } #' @export makeJob.ExperimentRegistry = function(id, reader = NULL, reg = getDefaultRegistry()) { row = mergedJobs(reg, convertId(reg, id), c("job.id", "problem", "prob.pars", "algorithm", "algo.pars", "repl", "resource.id")) resources = reg$resources[row, "resources", on = "resource.id", nomatch = NA]$resources[[1L]] %??% list() Experiment$new(file.dir = reg$file.dir, reader %??% RDSReader$new(FALSE), id = row$job.id, prob.pars = row$prob.pars[[1L]], algo.pars = row$algo.pars[[1L]], seed = getSeed(reg$seed, row$job.id), repl = row$repl, resources = resources, prob.name = row$problem, algo.name = row$algorithm) } getJob = function(jc, i, reader = NULL) { UseMethod("getJob") } #' @export getJob.JobCollection = function(jc, i, reader = RDSReader$new(FALSE)) { row = jc$jobs[i] Job$new(file.dir = jc$file.dir, reader = reader, id = row$job.id, job.pars = row$job.pars[[1L]], seed = getSeed(jc$seed, row$job.id), resources = jc$resources) } #' @export getJob.ExperimentCollection = function(jc, i, reader = RDSReader$new(FALSE)) { row = jc$jobs[i] Experiment$new(file.dir = jc$file.dir, reader = reader, id = row$job.id, prob.pars = row$prob.pars[[1L]], algo.pars = row$algo.pars[[1L]], seed = getSeed(jc$seed, row$job.id), repl = row$repl, resources = jc$resources, prob.name = row$problem, algo.name = row$algorithm, compress = jc$compress) } ================================================ FILE: R/JobCollection.R ================================================ #' @title JobCollection Constructor #' #' @description #' \code{makeJobCollection} takes multiple job ids and creates an object of class \dQuote{JobCollection} which holds all #' necessary information for the calculation with \code{\link{doJobCollection}}. It is implemented as an environment #' with the following variables: #' \describe{ #' \item{file.dir}{\code{file.dir} of the \link{Registry}.} #' \item{work.dir:}{\code{work.dir} of the \link{Registry}.} #' \item{job.hash}{Unique identifier of the job. Used to create names on the file system.} #' \item{jobs}{\code{\link[data.table]{data.table}} holding individual job information. See examples.} #' \item{log.file}{Location of the designated log file for this job.} #' \item{resources:}{Named list of of specified computational resources.} #' \item{uri}{Location of the job description file (saved with \code{link[base]{saveRDS}} on the file system.} #' \item{seed}{\code{integer(1)} Seed of the \link{Registry}.} #' \item{packages}{\code{character} with required packages to load via \code{\link[base]{require}}.} #' \item{namespaces}{\code{character} with required packages to load via \code{\link[base]{requireNamespace}}.} #' \item{source}{\code{character} with list of files to source before execution.} #' \item{load}{\code{character} with list of files to load before execution.} #' \item{array.var}{\code{character(1)} of the array environment variable specified by the cluster functions.} #' \item{array.jobs}{\code{logical(1)} signaling if jobs were submitted using \code{chunks.as.arrayjobs}.} #' } #' If your \link{ClusterFunctions} uses a template, \code{\link[brew]{brew}} will be executed in the environment of such #' a collection. Thus all variables available inside the job can be used in the template. #' #' @templateVar ids.default all #' @template ids #' @param resources [\code{list}]\cr #' Named list of resources. Default is \code{list()}. #' @template reg #' @return [\code{JobCollection}]. #' @family JobCollection #' @aliases JobCollection #' @rdname JobCollection #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE, packages = "methods") #' batchMap(identity, 1:5, reg = tmp) #' #' # resources are usually set in submitJobs() #' jc = makeJobCollection(1:3, resources = list(foo = "bar"), reg = tmp) #' ls(jc) #' jc$resources makeJobCollection = function(ids = NULL, resources = list(), reg = getDefaultRegistry()) { UseMethod("makeJobCollection", reg) } createCollection = function(jobs, resources = list(), reg = getDefaultRegistry()) { jc = new.env(parent = emptyenv()) jc$jobs = setkeyv(jobs, "job.id") jc$job.hash = rnd_hash("job") jc$job.name = if (anyMissing(jobs$job.name)) jc$job.hash else jobs$job.name[1L] jc$file.dir = reg$file.dir jc$work.dir = reg$work.dir jc$seed = reg$seed jc$uri = getJobFiles(reg, hash = jc$job.hash) jc$log.file = fs::path(reg$file.dir, "logs", sprintf("%s.log", jc$job.hash)) jc$packages = reg$packages jc$namespaces = reg$namespaces jc$source = reg$source jc$load = reg$load jc$resources = resources jc$array.var = reg$cluster.functions$array.var jc$array.jobs = isTRUE(resources$chunks.as.arrayjobs) jc$compress = reg$compress hooks = chintersect(names(reg$cluster.functions$hooks), batchtools$hooks$remote) if (length(hooks) > 0L) jc$hooks = reg$cluster.functions$hooks[hooks] return(jc) } #' @export makeJobCollection.Registry = function(ids = NULL, resources = list(), reg = getDefaultRegistry()) { jc = createCollection(mergedJobs(reg, convertIds(reg, ids), c("job.id", "job.name", "job.pars")), resources, reg) setClasses(jc, "JobCollection") } #' @export makeJobCollection.ExperimentRegistry = function(ids = NULL, resources = list(), reg = getDefaultRegistry()) { jc = createCollection(mergedJobs(reg, convertIds(reg, ids), c("job.id", "job.name", "problem", "algorithm", "prob.pars", "algo.pars", "repl")), resources, reg) setClasses(jc, c("ExperimentCollection", "JobCollection")) } #' @export print.JobCollection = function(x, ...) { catf("Collection of %i jobs", nrow(x$jobs)) catf(" Hash : %s", x$job.hash) catf(" Log file: %s", x$log.file) } ================================================ FILE: R/JobNames.R ================================================ #' @title Set and Retrieve Job Names #' @name JobNames #' #' @description #' Set custom names for jobs. These are passed to the template as \sQuote{job.name}. #' If no custom name is set (or any of the job names of the chunk is missing), #' the job hash is used as job name. #' Individual job names can be accessed via \code{jobs$job.name}. #' #' @templateVar ids.default all #' @template ids #' @param names [\code{character}]\cr #' Character vector of the same length as provided ids. #' @template reg #' @return \code{setJobNames} returns \code{NULL} invisibly, \code{getJobTable} #' returns a \code{data.table} with columns \code{job.id} and \code{job.name}. #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' ids = batchMap(identity, 1:10, reg = tmp) #' setJobNames(ids, letters[1:nrow(ids)], reg = tmp) #' getJobNames(reg = tmp) setJobNames = function(ids = NULL, names, reg = getDefaultRegistry()) { assertRegistry(reg, writeable = TRUE) ids = convertIds(reg, ids, default = noIds()) assertCharacter(names, min.chars = 1L, len = nrow(ids)) reg$status[ids, "job.name" := names] saveRegistry(reg) invisible(NULL) } #' @export #' @rdname JobNames getJobNames = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg) ids = convertIds(reg, ids, default = allIds(reg)) reg$status[ids, c("job.id", "job.name")] } ================================================ FILE: R/JobTables.R ================================================ #' @title Query Job Information #' #' @description #' \code{getJobStatus} returns the internal table which stores information about the computational #' status of jobs, \code{getJobPars} a table with the job parameters, \code{getJobResources} a table #' with the resources which were set to submit the jobs, and \code{getJobTags} the tags of the jobs #' (see \link{Tags}). #' #' \code{getJobTable} returns all these tables joined. #' #' @templateVar ids.default all #' @template ids #' @template reg #' @return [\code{\link[data.table]{data.table}}] with the following columns (not necessarily in this order): #' \describe{ #' \item{job.id}{Unique Job ID as integer.} #' \item{submitted}{Time the job was submitted to the batch system as \code{\link[base]{POSIXct}}.} #' \item{started}{Time the job was started on the batch system as \code{\link[base]{POSIXct}}.} #' \item{done}{Time the job terminated (successfully or with an error) as \code{\link[base]{POSIXct}}.} #' \item{error}{Either \code{NA} if the job terminated successfully or the error message.} #' \item{mem.used}{Estimate of the memory usage.} #' \item{batch.id}{Batch ID as reported by the scheduler.} #' \item{log.file}{Log file. If missing, defaults to \code{[job.hash].log}.} #' \item{job.hash}{Unique string identifying the job or chunk.} #' \item{time.queued}{Time in seconds (as \code{\link[base]{difftime}}) the job was queued.} #' \item{time.running}{Time in seconds (as \code{\link[base]{difftime}}) the job was running.} #' \item{pars}{List of parameters/arguments for this job.} #' \item{resources}{List of computational resources set for this job.} #' \item{tags}{Tags as joined string, delimited by \dQuote{,}.} #' \item{problem}{Only for \code{\link{ExperimentRegistry}}: the problem identifier.} #' \item{algorithm}{Only for \code{\link{ExperimentRegistry}}: the algorithm identifier.} #' } #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' f = function(x) if (x < 0) stop("x must be > 0") else sqrt(x) #' batchMap(f, x = c(-1, 0, 1), reg = tmp) #' submitJobs(reg = tmp) #' waitForJobs(reg = tmp) #' addJobTags(1:2, "tag1", reg = tmp) #' addJobTags(2, "tag2", reg = tmp) #' #' # Complete table: #' getJobTable(reg = tmp) #' #' # Job parameters: #' getJobPars(reg = tmp) #' #' # Set and retrieve tags: #' getJobTags(reg = tmp) #' #' # Job parameters with tags right-joined: #' rjoin(getJobPars(reg = tmp), getJobTags(reg = tmp)) getJobTable = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg) ids = convertIds(reg, ids) getJobStatus(ids, reg = reg)[getJobPars(ids, reg = reg)][getJobResources(ids = ids, reg = reg)][getJobTags(ids = ids, reg = reg)] } #' @export #' @rdname getJobTable getJobStatus = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) submitted = started = done = NULL cols = chsetdiff(names(reg$status), c("def.id", "resource.id")) tab = filter(reg$status, convertIds(reg, ids), cols) tab[, "submitted" := as.POSIXct(submitted, origin = "1970-01-01")] tab[, "started" := as.POSIXct(started, origin = "1970-01-01")] tab[, "done" := as.POSIXct(done, origin = "1970-01-01")] tab[, "time.queued" := difftime(started, submitted, units = "secs")] tab[, "time.running" := difftime(done, started, units = "secs")] tab[] } #' @export #' @rdname getJobTable getJobResources = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg) ids = convertIds(reg, ids) tab = merge(filter(reg$status, ids, c("job.id", "resource.id")), reg$resources, all.x = TRUE, by = "resource.id")[, c("job.id", "resources")] setkeyv(tab, "job.id")[] } #' @export #' @rdname getJobTable getJobPars = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg) UseMethod("getJobPars", object = reg) } #' @export getJobPars.Registry = function(ids = NULL, reg = getDefaultRegistry()) { ids = convertIds(reg, ids) tab = mergedJobs(reg, ids, c("job.id", "job.pars")) setkeyv(tab, "job.id")[] } #' @export getJobPars.ExperimentRegistry = function(ids = NULL, reg = getDefaultRegistry()) { ids = convertIds(reg, ids) tab = mergedJobs(reg, ids, c("job.id", "problem", "prob.pars", "algorithm", "algo.pars")) setkeyv(tab, "job.id")[] } #' @export #' @rdname getJobTable getJobTags = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg) ids = convertIds(reg, ids, default = allIds(reg)) tag = NULL reg$tags[ids, on = "job.id"][, list(tags = stri_flatten(sort(tag, na.last = TRUE), ",")), by = "job.id"] } ================================================ FILE: R/Joins.R ================================================ #' @title Inner, Left, Right, Outer, Semi and Anti Join for Data Tables #' @name JoinTables #' #' @description #' These helper functions perform join operations on data tables. #' Most of them are basically one-liners. #' See \url{https://rpubs.com/ronasta/join_data_tables} for a overview of join operations in #' data table or alternatively \pkg{dplyr}'s vignette on two table verbs. #' #' @param x [\code{\link{data.frame}}]\cr #' First data.frame to join. #' @param y [\code{\link{data.frame}}]\cr #' Second data.frame to join. #' @param by [\code{character}]\cr #' Column name(s) of variables used to match rows in \code{x} and \code{y}. #' If not provided, a heuristic similar to the one described in the \pkg{dplyr} vignette is used: #' \enumerate{ #' \item If \code{x} is keyed, the existing key will be used if \code{y} has the same column(s). #' \item If \code{x} is not keyed, the intersect of common columns names is used if not empty. #' \item Raise an exception. #' } #' You may pass a named character vector to merge on columns with different names in \code{x} and #' \code{y}: \code{by = c("x.id" = "y.id")} will match \code{x}'s \dQuote{x.id} column with \code{y}\'s #' \dQuote{y.id} column. #' @return [\code{\link[data.table]{data.table}}] with key identical to \code{by}. #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' # Create two tables for demonstration #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' batchMap(identity, x = 1:6, reg = tmp) #' x = getJobPars(reg = tmp) #' y = findJobs(x >= 2 & x <= 5, reg = tmp) #' y$extra.col = head(letters, nrow(y)) #' #' # Inner join: similar to intersect(): keep all columns of x and y with common matches #' ijoin(x, y) #' #' # Left join: use all ids from x, keep all columns of x and y #' ljoin(x, y) #' #' # Right join: use all ids from y, keep all columns of x and y #' rjoin(x, y) #' #' # Outer join: similar to union(): keep all columns of x and y with matches in x or y #' ojoin(x, y) #' #' # Semi join: filter x with matches in y #' sjoin(x, y) #' #' # Anti join: filter x with matches not in y #' ajoin(x, y) #' #' # Updating join: Replace values in x with values in y #' ujoin(x, y) ijoin = function(x, y, by = NULL) { x = as.data.table(x) y = as.data.table(y) by = guessBy(x, y, by) setKey(x[y, nomatch = 0L, on = by], by) } #' @rdname JoinTables #' @export ljoin = function(x, y, by = NULL) { x = as.data.table(x) y = as.data.table(y) by = guessBy(x, y, by) setKey(y[x, on = by], by) } #' @rdname JoinTables #' @export rjoin = function(x, y, by = NULL) { x = as.data.table(x) y = as.data.table(y) by = guessBy(x, y, by) setKey(x[y, on = by], by) } #' @rdname JoinTables #' @export ojoin = function(x, y, by = NULL) { x = as.data.table(x) y = as.data.table(y) by = guessBy(x, y, by) res = if (is.null(names(by))) merge(x, y, all = TRUE, by = by) else merge(x, y, all = TRUE, by.x = names(by), by.y = by) setKey(res, by) } #' @rdname JoinTables #' @export sjoin = function(x, y, by = NULL) { x = as.data.table(x) y = as.data.table(y) by = guessBy(x, y, by) w = unique(x[y, on = by, nomatch = 0L, which = TRUE, allow.cartesian = TRUE]) setKey(x[w], by) } #' @rdname JoinTables #' @export ajoin = function(x, y, by = NULL) { x = as.data.table(x) y = as.data.table(y) by = guessBy(x, y, by) setKey(x[!y, on = by], by) } #' @rdname JoinTables #' @param all.y [logical(1)]\cr #' Keep columns of \code{y} which are not in \code{x}? #' @export ujoin = function(x, y, all.y = FALSE, by = NULL) { assertFlag(all.y) x = if (is.data.table(x)) copy(x) else as.data.table(x) y = as.data.table(y) by = guessBy(x, y, by) cn = chsetdiff(names(y), by) if (!all.y) cn = chintersect(names(x), cn) if (length(cn) == 0L) return(x) expr = parse(text = stri_join("`:=`(", stri_flatten(sprintf("%1$s=i.%1$s", cn), ","), ")")) setKey(x[y, eval(expr), on = by], by) } guessBy = function(x, y, by = NULL) { assertDataFrame(x, min.cols = 1L) assertDataFrame(y, min.cols = 1L) if (is.null(by)) { res = key(x) if (!is.null(res) && all(res %chin% names(y))) return(res) res = chintersect(names(x), names(y)) if (length(res) > 0L) return(res) stop("Unable to guess columns to match on. Please specify them explicitly or set keys beforehand.") } else { if (is.null(names(by))) { assertSubset(by, names(x)) } else { assertSubset(names(by), names(x)) } assertSubset(by, names(y)) return(by) } } setKey = function(res, by) { by = names(by) %??% unname(by) if (!identical(key(res), by)) setkeyv(res, by) res[] } ================================================ FILE: R/Logs.R ================================================ #' @useDynLib batchtools fill_gaps readLog = function(id, missing.as.empty = FALSE, reg = getDefaultRegistry()) { log.file = getLogFiles(reg, id) if (is.na(log.file) || !waitForFile(log.file, timeout = reg$cluster.functions$fs.latency, must.work = FALSE)) { if (missing.as.empty) return(data.table(job.id = integer(0L), lines = character(0L))) stopf("Log file '%s' for job with id %i not available", log.file, id$job.id) } lines = readLines(log.file) if (length(lines) > 0L) { job.id = as.integer(stri_match_last_regex(lines, c("\\[batchtools job\\.id=([0-9]+)\\]$"))[, 2L]) job.id = .Call(fill_gaps, job.id) } else { job.id = integer(0L) } setkeyv(data.table(job.id = job.id, lines = lines), "job.id", physical = FALSE) } extractLog = function(log, id) { job.id = NULL log[is.na(job.id) | job.id %in% id$job.id]$lines } #' @title Grep Log Files for a Pattern #' #' @description #' Crawls through log files and reports jobs with lines matching the \code{pattern}. #' See \code{\link{showLog}} for an example. #' #' @templateVar ids.default findStarted #' @template ids #' @param pattern [\code{character(1L)}]\cr #' Regular expression or string (see \code{fixed}). #' @param ignore.case [\code{logical(1L)}]\cr #' If \code{TRUE} the match will be performed case insensitively. #' @param fixed [\code{logical(1L)}]\cr #' If \code{FALSE} (default), \code{pattern} is a regular expression and a fixed string otherwise. #' @template reg #' @export #' @family debug #' @return [\code{\link[data.table]{data.table}}] with columns \dQuote{job.id} and \dQuote{message}. grepLogs = function(ids = NULL, pattern, ignore.case = FALSE, fixed = FALSE, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) assertString(pattern, min.chars = 1L) assertFlag(ignore.case) assertFlag(fixed) job.id = job.hash = log.file = matches = NULL ids = convertIds(reg, ids, default = .findStarted(reg = reg)) tab = filter(reg$status[!is.na(job.hash)], ids)[, list(job.id = job.id, hash = sprintf("%s-%s", job.hash, log.file))] if (nrow(tab) == 0L) return(data.table(job.id = integer(0L), matches = character(0L))) setorderv(tab, "hash") res = data.table(job.id = tab$job.id, matches = NA_character_) hash.before = "" matcher = if (fixed) stri_detect_fixed else stri_detect_regex for (i in seq_row(tab)) { if (hash.before != tab$hash[i]) { log = readLog(tab[i], missing.as.empty = TRUE, reg = reg) hash.before = tab$hash[i] } if (nrow(log) > 0L) { lines = extractLog(log, tab[i]) m = matcher(lines, pattern, case_insensitive = ignore.case) if (any(m)) set(res, i, "matches", stri_flatten(lines[m], "\n")) } } setkeyv(res[!is.na(matches)], "job.id")[] } #' @title Inspect Log Files #' #' @description #' \code{showLog} opens the log in the pager. For customization, see \code{\link[base]{file.show}}. #' \code{getLog} returns the log as character vector. #' @template id #' @template reg #' @export #' @family debug #' @return Nothing. #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' #' # Create some dummy jobs #' fun = function(i) { #' if (i == 3) stop(i) #' if (i %% 2 == 1) warning("That's odd.") #' } #' ids = batchMap(fun, i = 1:5, reg = tmp) #' submitJobs(reg = tmp) #' waitForJobs(reg = tmp) #' getStatus(reg = tmp) #' #' writeLines(getLog(ids[1], reg = tmp)) #' \dontrun{ #' showLog(ids[1], reg = tmp) #' } #' #' grepLogs(pattern = "warning", ignore.case = TRUE, reg = tmp) showLog = function(id, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) id = convertId(reg, id) lines = extractLog(readLog(id, reg = reg), id) log.file = fs::path(fs::path_temp(), sprintf("%i.log", id$job.id)) writeLines(text = lines, con = log.file) file.show(log.file, delete.file = TRUE) } #' @export #' @rdname showLog getLog = function(id, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) id = convertId(reg, id) extractLog(readLog(id, reg = reg), id) } ================================================ FILE: R/Problem.R ================================================ #' @title Define Problems for Experiments #' #' @description #' Problems may consist of up to two parts: A static, immutable part (\code{data} in \code{addProblem}) #' and a dynamic, stochastic part (\code{fun} in \code{addProblem}). #' For example, for statistical learning problems a data frame would be the static problem part while #' a resampling function would be the stochastic part which creates problem instance. #' This instance is then typically passed to a learning algorithm like a wrapper around a statistical model #' (\code{fun} in \code{\link{addAlgorithm}}). #' #' This function serialize all components to the file system and registers the problem in the \code{\link{ExperimentRegistry}}. #' #' \code{removeProblem} removes all jobs from the registry which depend on the specific problem. #' \code{reg$problems} holds the IDs of already defined problems. #' #' @param name [\code{character(1)}]\cr #' Unique identifier for the problem. #' @param data [\code{ANY}]\cr #' Static problem part. Default is \code{NULL}. #' @param fun [\code{function}]\cr #' The function defining the stochastic problem part. #' The static part is passed to this function with name \dQuote{data} and the \code{\link{Job}}/\code{\link{Experiment}} #' is passed as \dQuote{job}. #' Therefore, your function must have the formal arguments \dQuote{job} and \dQuote{data} (or dots \code{...}). #' If you do not provide a function, it defaults to a function which just returns the data part. #' @param seed [\code{integer(1)}]\cr #' Start seed for this problem. This allows the \dQuote{synchronization} of a stochastic #' problem across algorithms, so that different algorithms are evaluated on the same stochastic instance. #' If the problem seed is defined, the seeding mechanism works as follows: #' (1) Before the dynamic part of a problem is instantiated, #' the seed of the problem + [replication number] - 1 is set, i.e. the first #' replication uses the problem seed. (2) The stochastic part of the problem is #' instantiated. (3) From now on the usual experiment seed of the registry is used, #' see \code{\link{ExperimentRegistry}}. #' If \code{seed} is set to \code{NULL} (default), the job seed is used to instantiate the problem and #' different algorithms see different stochastic instances of the same problem. #' @param cache [\code{logical(1)}]\cr #' If \code{TRUE} and \code{seed} is set, problem instances will be cached on the file system. #' This assumes that each problem instance is deterministic for each combination of hyperparameter setting #' and each replication number. #' This feature is experimental. #' @template expreg #' @return [\code{Problem}]. Object of class \dQuote{Problem} (invisibly). #' @aliases Problem #' @seealso \code{\link{Algorithm}}, \code{\link{addExperiments}} #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE) #' addProblem("p1", fun = function(job, data) data, reg = tmp) #' addProblem("p2", fun = function(job, data) job, reg = tmp) #' addAlgorithm("a1", fun = function(job, data, instance) instance, reg = tmp) #' addExperiments(repls = 2, reg = tmp) #' #' # List problems, algorithms and job parameters: #' tmp$problems #' tmp$algorithms #' getJobPars(reg = tmp) #' #' # Remove one problem #' removeProblems("p1", reg = tmp) #' #' # List problems and algorithms: #' tmp$problems #' tmp$algorithms #' getJobPars(reg = tmp) addProblem = function(name, data = NULL, fun = NULL, seed = NULL, cache = FALSE, reg = getDefaultRegistry()) { assertRegistry(reg, class = "ExperimentRegistry", writeable = TRUE) assertString(name, min.chars = 1L) if (!stri_detect_regex(name, "^[[:alnum:]_.-]+$")) stopf("Illegal characters in problem name: %s", name) if (is.null(fun)) { fun = function(job, data, ...) data } else { assert(checkFunction(fun, args = c("job", "data")), checkFunction(fun, args = "...")) } if (is.null(seed)) { cache = FALSE } else { seed = asCount(seed, positive = TRUE) cache = assertFlag(cache) } info("Adding problem '%s'", name) prob = setClasses(list(name = name, seed = seed, cache = cache, data = data, fun = fun), "Problem") writeRDS(prob, file = getProblemURI(reg, name), compress = reg$compress) reg$problems = union(reg$problems, name) cache.dir = getProblemCacheDir(reg, name) if (fs::dir_exists(cache.dir)) fs::dir_delete(cache.dir) if (cache) fs::dir_create(cache.dir) saveRegistry(reg) invisible(prob) } #' @export #' @rdname addProblem removeProblems = function(name, reg = getDefaultRegistry()) { assertRegistry(reg, class = "ExperimentRegistry", writeable = TRUE, running.ok = FALSE) assertCharacter(name, any.missing = FALSE) assertSubset(name, reg$problems) problem = NULL for (nn in name) { def.ids = reg$defs[problem == nn, "def.id"] job.ids = filter(def.ids, reg$status, "job.id") info("Removing Problem '%s' and %i corresponding jobs ...", nn, nrow(job.ids)) file_remove(getProblemURI(reg, nn)) reg$defs = reg$defs[!def.ids] reg$status = reg$status[!job.ids] reg$problems = chsetdiff(reg$problems, nn) cache = getProblemCacheDir(reg, nn) if (fs::dir_exists(cache)) fs::dir_delete(cache) } sweepRegistry(reg) invisible(TRUE) } getProblemURI = function(reg, name) { fs::path(dir(reg, "problems"), mangle(name)) } getProblemCacheDir = function(reg, name) { fs::path(dir(reg, "cache"), "problems", base32_encode(name, use.padding = FALSE)) } getProblemCacheURI = function(job) { fs::path(getProblemCacheDir(job, job$prob.name), sprintf("%s.rds", digest(list(job$prob.name, job$prob.pars, job$repl)))) } ================================================ FILE: R/RDSReader.R ================================================ RDSReader = R6Class("RDSReader", cloneable = FALSE, public = list( cache = list(), use.cache = NA, initialize = function(use.cache = FALSE) { self$use.cache = use.cache }, get = function(uri, slot = NA_character_) { read = function(uri) if (fs::file_exists(uri)) readRDS(uri) else NULL # no cache used, read object from disk and return if (!self$use.cache) return(read(uri)) # not slotted: # look up object in cache. If not found, add to cache. Return cached object if (is.na(slot)) { if (! uri %chin% names(self$cache)) self$cache[[uri]] = read(uri) return(self$cache[[uri]]) } # slotted: # object is stored in cache[[slot]] as list(obj = [cached obj], uri = uri) if (is.null(self$cache[[slot]]) || self$cache[[slot]]$uri != uri) self$cache[[slot]] = list(obj = read(uri), uri = uri) return(self$cache[[slot]]$obj) } ) ) ================================================ FILE: R/Registry.R ================================================ #' @title Registry Constructor #' #' @description #' \code{makeRegistry} constructs the inter-communication object for all functions in \code{batchtools}. #' All communication transactions are processed via the file system: #' All information required to run a job is stored as \code{\link{JobCollection}} in a file in the #' a subdirectory of the \code{file.dir} directory. #' Each jobs stores its results as well as computational status information (start time, end time, error message, ...) #' also on the file system which is regular merged parsed by the master using \code{\link{syncRegistry}}. #' After integrating the new information into the Registry, the Registry is serialized to the file system via \code{\link{saveRegistry}}. #' Both \code{\link{syncRegistry}} and \code{\link{saveRegistry}} are called whenever required internally. #' Therefore it should be safe to quit the R session at any time. #' Work can later be resumed by calling \code{\link{loadRegistry}} which de-serializes the registry from #' the file system. #' #' The registry created last is saved in the package namespace (unless \code{make.default} is set to #' \code{FALSE}) and can be retrieved via \code{\link{getDefaultRegistry}}. #' #' Canceled jobs and jobs submitted multiple times may leave stray files behind. #' These can be swept using \code{\link{sweepRegistry}}. #' \code{\link{clearRegistry}} completely erases all jobs from a registry, including log files and results, #' and thus allows you to start over. #' #' @details #' Currently \pkg{batchtools} understands the following options set via the configuration file: #' \describe{ #' \item{\code{cluster.functions}:}{As returned by a constructor, e.g. \code{\link{makeClusterFunctionsSlurm}}.} #' \item{\code{default.resources}:}{List of resources to use. Will be overruled by resources specified via \code{\link{submitJobs}}.} #' \item{\code{temp.dir}:}{Path to directory to use for temporary registries.} #' \item{\code{sleep}:}{Custom sleep function. See \code{\link{waitForJobs}}.} #' \item{\code{expire.after}:}{Number of iterations before treating jobs as expired in \code{\link{waitForJobs}}.} #' \item{\code{compress}:}{Compression algorithm to use via \code{\link{saveRDS}}.} #' } #' #' @param file.dir [\code{character(1)}]\cr #' Path where all files of the registry are saved. #' Default is directory \dQuote{registry} in the current working directory. #' The provided path will get normalized unless it is given relative to the home directory #' (i.e., starting with \dQuote{~}). Note that some templates do not handle relative paths well. #' #' If you pass \code{NA}, a temporary directory will be used. #' This way, you can create disposable registries for \code{\link{btlapply}} or examples. #' By default, the temporary directory \code{\link[base]{tempdir}()} will be used. #' If you want to use another directory, e.g. a directory which is shared between nodes, #' you can set it in your configuration file by setting the variable \code{temp.dir}. #' @param work.dir [\code{character(1)}]\cr #' Working directory for R process for running jobs. #' Defaults to the working directory currently set during Registry construction (see \code{\link[base]{getwd}}). #' \code{loadRegistry} uses the stored \code{work.dir}, but you may also explicitly overwrite it, #' e.g., after switching to another system. #' #' The provided path will get normalized unless it is given relative to the home directory #' (i.e., starting with \dQuote{~}). Note that some templates do not handle relative paths well. #' @param conf.file [\code{character(1)}]\cr #' Path to a configuration file which is sourced while the registry is created. #' In the configuration file you can define how \pkg{batchtools} interacts with the system via \code{\link{ClusterFunctions}}. #' Separating the configuration of the underlying host system from the R code allows to easily move computation to another site. #' #' The file lookup is implemented in the internal (but exported) function \code{findConfFile} which returns the first file found of the following candidates: #' \enumerate{ #' \item{File \dQuote{batchtools.conf.R} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}.} #' \item{File \dQuote{batchtools.conf.R} in the current working directory.} #' \item{File \dQuote{config.R} in the user configuration directory as reported by \code{rappdirs::user_config_dir("batchtools", expand = FALSE)} (depending on OS, e.g., on linux this usually resolves to \dQuote{~/.config/batchtools/config.R}).} #' \item{\dQuote{.batchtools.conf.R} in the home directory (\dQuote{~}).} #' \item{\dQuote{config.R} in the site config directory as reported by \code{rappdirs::site_config_dir("batchtools")} (depending on OS). This file can be used for admins to set sane defaults for a computation site.} #' } #' Set to \code{NA} if you want to suppress reading any configuration file. #' If a configuration file is found, it gets sourced inside the environment of the registry after the defaults for all variables are set. #' Therefore you can set and overwrite slots, e.g. \code{default.resources = list(walltime = 3600)} to set default resources or \dQuote{max.concurrent.jobs} to #' limit the number of jobs allowed to run simultaneously on the system. #' @param packages [\code{character}]\cr #' Packages that will always be loaded on each node. #' Uses \code{\link[base]{require}} internally. #' Default is \code{character(0)}. #' @param namespaces [\code{character}]\cr #' Same as \code{packages}, but the packages will not be attached. #' Uses \code{\link[base]{requireNamespace}} internally. #' Default is \code{character(0)}. #' @param source [\code{character}]\cr #' Files which should be sourced on the slaves prior to executing a job. #' Calls \code{\link[base]{sys.source}} using the \code{\link[base]{.GlobalEnv}}. #' @param load [\code{character}]\cr #' Files which should be loaded on the slaves prior to executing a job. #' Calls \code{\link[base]{load}} using the \code{\link[base]{.GlobalEnv}}. #' @param seed [\code{integer(1)}]\cr #' Start seed for jobs. Each job uses the (\code{seed} + \code{job.id}) as seed. #' Default is a random integer between 1 and 32768. #' Note that there is an additional seeding mechanism to synchronize instantiation of #' \code{\link{Problem}}s in a \code{\link{ExperimentRegistry}}. #' @param make.default [\code{logical(1)}]\cr #' If set to \code{TRUE}, the created registry is saved inside the package #' namespace and acts as default registry. You might want to switch this #' off if you work with multiple registries simultaneously. #' Default is \code{TRUE}. #' @return [\code{environment}] of class \dQuote{Registry} with the following slots: #' \describe{ #' \item{\code{file.dir} [path]:}{File directory.} #' \item{\code{work.dir} [path]:}{Working directory.} #' \item{\code{temp.dir} [path]:}{Temporary directory. Used if \code{file.dir} is \code{NA} to create temporary registries.} #' \item{\code{packages} [character()]:}{Packages to load on the slaves.} #' \item{\code{namespaces} [character()]:}{Namespaces to load on the slaves.} #' \item{\code{seed} [integer(1)]:}{Registry seed. Before each job is executed, the seed \code{seed + job.id} is set.} #' \item{\code{cluster.functions} [cluster.functions]:}{Usually set in your \code{conf.file}. Set via a call to \code{\link{makeClusterFunctions}}. See example.} #' \item{\code{default.resources} [named list()]:}{Usually set in your \code{conf.file}. Named list of default resources.} #' \item{\code{max.concurrent.jobs} [integer(1)]:}{Usually set in your \code{conf.file}. Maximum number of concurrent jobs for a single user and current registry on the system. #' \code{\link{submitJobs}} will try to respect this setting. The resource \dQuote{max.concurrent.jobs} has higher precedence.} #' \item{\code{defs} [data.table]:}{Table with job definitions (i.e. parameters).} #' \item{\code{status} [data.table]:}{Table holding information about the computational status. Also see \code{\link{getJobStatus}}.} #' \item{\code{resources} [data.table]:}{Table holding information about the computational resources used for the job. Also see \code{\link{getJobResources}}.} #' \item{\code{tags} [data.table]:}{Table holding information about tags. See \link{Tags}.} #' \item{\code{hash} [character(1)]:}{Unique hash which changes each time the registry gets saved to the file system. Can be utilized to invalidate the cache of \pkg{knitr}.} #' } #' @aliases Registry #' @family Registry #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' print(tmp) #' #' # Set cluster functions to interactive mode and start jobs in external R sessions #' tmp$cluster.functions = makeClusterFunctionsInteractive(external = TRUE) #' #' # Change packages to load #' tmp$packages = c("MASS") #' saveRegistry(reg = tmp) makeRegistry = function(file.dir = "registry", work.dir = getwd(), conf.file = findConfFile(), packages = character(0L), namespaces = character(0L), source = character(0L), load = character(0L), seed = NULL, make.default = TRUE) { assertString(file.dir, na.ok = TRUE) if (!is.na(file.dir)) assertPathForOutput(file.dir, overwrite = FALSE) assertString(work.dir) assertDirectoryExists(work.dir, access = "r") assertString(conf.file, na.ok = TRUE) assertCharacter(packages, any.missing = FALSE, min.chars = 1L) assertCharacter(namespaces, any.missing = FALSE, min.chars = 1L) assertCharacter(source, any.missing = FALSE, min.chars = 1L) assertCharacter(load, any.missing = FALSE, min.chars = 1L) assertFlag(make.default) seed = if (is.null(seed)) as.integer(runif(1L, 1, 32768)) else asCount(seed, positive = TRUE) reg = new.env(parent = asNamespace("batchtools")) reg$file.dir = file.dir reg$work.dir = work.dir reg$packages = packages reg$namespaces = namespaces reg$source = source reg$load = load reg$seed = seed reg$writeable = TRUE reg$version = packageVersion("batchtools") reg$defs = data.table( def.id = integer(0L), job.pars = list(), key = "def.id") reg$status = data.table( job.id = integer(0L), def.id = integer(0L), submitted = double(0L), started = double(0L), done = double(0L), error = character(0L), mem.used = double(0L), resource.id = integer(0L), batch.id = character(0L), log.file = character(0L), job.hash = character(0L), job.name = character(0L), key = "job.id") reg$resources = data.table( resource.id = integer(0L), resource.hash = character(0L), resources = list(), key = "resource.id") reg$tags = data.table( job.id = integer(0L), tag = character(0L), key = "job.id") setSystemConf(reg, conf.file) if (is.na(file.dir)) reg$file.dir = fs::file_temp("registry", tmp_dir = reg$temp.dir) "!DEBUG [makeRegistry]: Creating directories in '`reg$file.dir`'" fs::dir_create(c(reg$file.dir, reg$work.dir)) reg$file.dir = fs::path_abs(reg$file.dir) reg$work.dir = fs::path_abs(reg$work.dir) fs::dir_create(fs::path(reg$file.dir, c("jobs", "results", "updates", "logs", "exports", "external"))) with_dir(reg$work.dir, loadRegistryDependencies(reg)) class(reg) = "Registry" saveRegistry(reg) reg$mtime = file_mtime(fs::path(reg$file.dir, "registry.rds")) reg$hash = rnd_hash() info("Created registry in '%s' using cluster functions '%s'", reg$file.dir, reg$cluster.functions$name) if (make.default) batchtools$default.registry = reg return(reg) } #' @export print.Registry = function(x, ...) { cat("Job Registry\n") catf(" Backend : %s", x$cluster.functions$name) catf(" File dir : %s", x$file.dir) catf(" Work dir : %s", x$work.dir) catf(" Jobs : %i", nrow(x$status)) catf(" Seed : %i", x$seed) catf(" Writeable: %s", x$writeable) } #' @title assertRegistry #' #' @description #' Assert that a given object is a \code{batchtools} registry. #' Additionally can sync the registry, check if it is writeable, or check if jobs are running. #' If any check fails, throws an error indicting the reason for the failure. #' #' @param reg [\code{\link{Registry}}]\cr #' The object asserted to be a \code{Registry}. #' @param class [\code{character(1)}]\cr #' If \code{NULL} (default), \code{reg} must only inherit from class \dQuote{Registry}. #' Otherwise check that \code{reg} is of class \code{class}. #' E.g., if set to \dQuote{Registry}, a \code{\link{ExperimentRegistry}} would not pass. #' @param writeable [\code{logical(1)}]\cr #' Check if the registry is writeable. #' @param sync [\code{logical(1)}]\cr #' Try to synchronize the registry by including pending results from the file system. #' See \code{\link{syncRegistry}}. #' @param running.ok [\code{logical(1)}]\cr #' If \code{FALSE} throw an error if jobs associated with the registry are currently running. #' @return \code{TRUE} invisibly. #' @export assertRegistry = function(reg, class = NULL, writeable = FALSE, sync = FALSE, running.ok = TRUE) { if (batchtools$debug) { if (!identical(key(reg$status), "job.id")) stop("Key of reg$job.id lost") if (!identical(key(reg$defs), "def.id")) stop("Key of reg$defs lost") if (!identical(key(reg$resources), "resource.id")) stop("Key of reg$resources lost") } if (is.null(class)) { assertClass(reg, "Registry") } else { assertString(class) assertClass(reg, class, ordered = TRUE) } assertFlag(writeable) assertFlag(sync) assertFlag(running.ok) if (reg$writeable && file_mtime(fs::path(reg$file.dir, "registry.rds")) > reg$mtime + 1) { warning("Registry has been altered since last read. Switching to read-only mode in this session. See ?loadRegistry.") reg$writeable = FALSE } if (writeable && !reg$writeable) stop("Registry must be writeable. See ?loadRegistry.") if (!running.ok && nrow(.findOnSystem(reg = reg)) > 0L) stop("This operation is not allowed while jobs are running on the system") if (sync) { merged = sync(reg) if (length(merged)) { saveRegistry(reg) file_remove(merged) } } invisible(TRUE) } loadRegistryDependencies = function(x, must.work = FALSE) { "!DEBUG [loadRegistryDependencies]: Starting ..." pkgs = union(x$packages, "methods") handler = if (must.work) stopf else warningf ok = vlapply(pkgs, require, character.only = TRUE) if (!all(ok)) handler("Failed to load packages: %s", stri_flatten(pkgs[!ok], ", ")) ok = vlapply(x$namespaces, requireNamespace) if (!all(ok)) handler("Failed to load namespaces: %s", stri_flatten(x$namespaces[!ok], ", ")) if (length(x$source) > 0L) { for (fn in x$source) { ok = try(sys.source(fn, envir = .GlobalEnv), silent = TRUE) if (is.error(ok)) handler("Failed to source file '%s': %s", fn, as.character(ok)) } } if (length(x$load) > 0L) { for (fn in x$load) { ok = try(load(fn, envir = .GlobalEnv), silent = TRUE) if (is.error(ok)) handler("Failed to load file '%s': %s", fn, as.character(ok)) } } path = fs::path(x$file.dir, "exports") fns = list.files(path, pattern = "\\.rds$") if (length(fns) > 0L) { ee = .GlobalEnv Map(function(name, fn) { delayedAssign(x = name, value = readRDS(fn), assign.env = ee) }, name = unmangle(fns), fn = fs::path(path, fns)) } invisible(TRUE) } ================================================ FILE: R/Tags.R ================================================ #' @title Add or Remove Job Tags #' @name Tags #' @rdname Tags #' #' @description #' Add and remove arbitrary tags to jobs. #' #' @templateVar ids.default all #' @template ids #' @param tags [\code{character}]\cr #' Tags to add or remove as strings. Each tag may consist of letters, numbers, underscore and dots (pattern \dQuote{^[[:alnum:]_.]+}). #' @return [\code{\link[data.table]{data.table}}] with job ids affected (invisible). #' @template reg #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' ids = batchMap(sqrt, x = -3:3, reg = tmp) #' #' # Add new tag to all ids #' addJobTags(ids, "needs.computation", reg = tmp) #' getJobTags(reg = tmp) #' #' # Add more tags #' addJobTags(findJobs(x < 0, reg = tmp), "x.neg", reg = tmp) #' addJobTags(findJobs(x > 0, reg = tmp), "x.pos", reg = tmp) #' getJobTags(reg = tmp) #' #' # Submit first 5 jobs and remove tag if successful #' ids = submitJobs(1:5, reg = tmp) #' if (waitForJobs(reg = tmp)) #' removeJobTags(ids, "needs.computation", reg = tmp) #' getJobTags(reg = tmp) #' #' # Grep for warning message and add a tag #' addJobTags(grepLogs(pattern = "NaNs produced", reg = tmp), "div.zero", reg = tmp) #' getJobTags(reg = tmp) #' #' # All tags where tag x.neg is set: #' ids = findTagged("x.neg", reg = tmp) #' getUsedJobTags(ids, reg = tmp) addJobTags = function(ids = NULL, tags, reg = getDefaultRegistry()) { assertRegistry(reg, writeable = TRUE) ids = convertIds(reg, ids, default = allIds(reg)) assertCharacter(tags, any.missing = FALSE, pattern = "^[[:alnum:]_.]+$", min.len = 1L) for (cur in tags) { ids[, ("tag") := cur] reg$tags = rbind(reg$tags, ids) } reg$tags = setkeyv(unique(reg$tags, by = NULL), "job.id") saveRegistry(reg) invisible(ids[, "job.id"]) } #' @export #' @rdname Tags removeJobTags = function(ids = NULL, tags, reg = getDefaultRegistry()) { assertRegistry(reg, writeable = TRUE) ids = convertIds(reg, ids) assertCharacter(tags, any.missing = FALSE, pattern = "^[[:alnum:]_.]+$", min.len = 1L) job.id = tag = NULL if (is.null(ids)) { i = reg$tags[tag %in% tags, which = TRUE] } else { i = reg$tags[job.id %in% ids$job.id & tag %in% tags, which = TRUE] } if (length(i) > 0L) { ids = unique(reg$tags[i, "job.id"], by = "job.id") reg$tags = reg$tags[-i] saveRegistry(reg) } else { ids = noIds() } invisible(ids) } #' @export #' @rdname Tags getUsedJobTags = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg) ids = convertIds(reg, ids) unique(filter(reg$tags, ids), by = "tag")$tag } ================================================ FILE: R/Worker.R ================================================ #' @title Create a Linux-Worker #' @docType class #' @format An \code{\link[R6]{R6Class}} generator object #' #' @description #' \code{\link[R6]{R6Class}} to create local and remote linux workers. #' #' @field nodename Host name. Set via constructor. #' @field ncpus Number of CPUs. Set via constructor and defaults to a heuristic which tries to detect the number of CPUs of the machine. #' @field max.load Maximum load average (of the last 5 min). Set via constructor and defaults to the number of CPUs of the machine. #' @field status Status of the worker; one of \dQuote{unknown}, \dQuote{available}, \dQuote{max.cpus} and \dQuote{max.load}. #' @section Methods: #' \describe{ #' \item{\code{new(nodename, ncpus, max.load)}}{Constructor.} #' \item{\code{update(reg)}}{Update the worker status.} #' \item{\code{list(reg)}}{List running jobs.} #' \item{\code{start(reg, fn, outfile)}}{Start job collection in file \dQuote{fn} and output to \dQuote{outfile}.} #' \item{\code{kill(reg, batch.id)}}{Kill job matching the \dQuote{batch.id}.} #' } #' @return [\code{\link{Worker}}]. #' @export #' @examples #' \dontrun{ #' # create a worker for the local machine and use 4 CPUs. #' Worker$new("localhost", ncpus = 4) #' } Worker = R6Class("Worker", cloneable = FALSE, public = list( nodename = NULL, ncpus = NULL, max.load = NULL, script = NULL, status = "unknown", initialize = function(nodename, ncpus = NULL, max.load = NULL) { if (testOS("windows")) stop("Windows is not supported by the Worker Class") self$nodename = assertString(nodename) if (!is.null(ncpus)) ncpus = asCount(ncpus) if (!is.null(max.load)) assertNumber(max.load) if (nodename == "localhost") { self$script = system.file("bin", "linux-helper", package = "batchtools") } else { args = c("-e", shQuote("message(\"[bt] --BOF--\\n\", \"[bt] \", system.file(\"bin/linux-helper\", package = \"batchtools\"), \"\\n[bt] --EOF--\\n\")")) res = runOSCommand("Rscript", args, nodename = nodename) script = private$filter_output(res)$output if (!testString(script, min.chars = 1L)) { stopf("Unable to locate helper script on SSH node '%s'. Is batchtools installed on the node?", nodename) } } self$ncpus = ncpus %??% as.integer(private$run("number-of-cpus")$output) self$max.load = max.load %??% self$ncpus }, list = function(reg) { stri_join(self$nodename, "#", stri_trim_both(private$run(c("list-jobs", reg$file.dir))$output)) }, start = function(reg, fn, outfile) { private$run(c("start-job", fn, outfile)) }, kill = function(reg, batch.id) { pid = stri_split_fixed(batch.id, "#", n = 2L)[[1L]][2L] cfKillJob(reg, self$script, c("kill-job", pid)) }, update = function(reg) { "!DEBUG [Worker]: Updating Worker '`self$nodename`'" res = private$run(c("status", reg$file.dir)) res = as.numeric(stri_split_regex(res$output, "\\s+")[[1L]]) names(res) = c("load", "n.rprocs", "n.rprocs.50", "n.jobs") self$status = if (res["load"] > self$max.load) { "max.load" } else if (res["n.jobs"] >= self$ncpus) { "max.cpus" } else { "available" } return(res) } ), private = list( filter_output = function(res) { output = stri_trim_both(res$output) marker = stri_detect_regex(output, "^\\[bt\\] --[BE]OF--$") if (sum(marker) != 2L) { stopf("runOSCommand failed: Expected BOF+EOF markers for '%s %s', but got:\n %s", res$sys.cmd, stri_flatten(res$sys.args, " "), stri_flatten(res$output, "\n") %??% "") } info = stri_startswith_fixed(output, "[bt]") & !marker res$output = stri_trim_left(stri_sub(output[info], 5L)) res }, run = function(args) { private$filter_output(runOSCommand(self$script, args, nodename = self$nodename)) } ) ) ================================================ FILE: R/addExperiments.R ================================================ #' @title Add Experiments to the Registry #' #' @description #' Adds experiments (parametrized combinations of problems with algorithms) to the registry and thereby defines batch jobs. #' #' If multiple problem designs or algorithm designs are provided, they are combined via the Cartesian product. #' E.g., if you have two problems \code{p1} and \code{p2} and three algorithms \code{a1}, \code{a2} and \code{a3}, #' \code{addExperiments} creates experiments for all parameters for the combinations \code{(p1, a1)}, \code{(p1, a2)}, #' \code{(p1, a3)}, \code{(p2, a1)}, \code{(p2, a2)} and \code{(p2, a3)}. #' #' @note #' R's \code{data.frame} converts character vectors to factors by default in R versions prior to 4.0.0 which frequently resulted in problems using \code{addExperiments}. #' Therefore, this function will warn about factor variables if the following conditions hold: #' \enumerate{ #' \item R version is < 4.0.0 #' \item The design is passed as a \code{data.frame}, not a \code{\link[data.table]{data.table}} or \code{\link[tibble]{tibble}}. #' \item The option \dQuote{stringsAsFactors} is not set or set to \code{TRUE}. #' } #' #' @param prob.designs [named list of \code{\link[base]{data.frame}}]\cr #' Named list of data frames (or \code{\link[data.table]{data.table}}). #' The name must match the problem name while the column names correspond to parameters of the problem. #' If \code{NULL}, experiments for all defined problems without any parameters are added. #' @param algo.designs [named list of \code{\link[data.table]{data.table}} or \code{\link[base]{data.frame}}]\cr #' Named list of data frames (or \code{\link[data.table]{data.table}}). #' The name must match the algorithm name while the column names correspond to parameters of the algorithm. #' If \code{NULL}, experiments for all defined algorithms without any parameters are added. #' @param repls [\code{integer()}]\cr #' Number of replications for each problem design in `prob.designs` (automatically replicated to #' the correct length). #' @param combine [\code{character(1)}]\cr #' How to combine the rows of a single problem design with the rows of a single algorithm design? #' Default is \dQuote{crossprod} which combines each row of the problem design which each row of the algorithm design #' in a cross-product fashion. Set to \dQuote{bind} to just \code{\link[base]{cbind}} the tables of #' problem and algorithm designs where the shorter table is repeated if necessary. #' @template expreg #' @return [\code{\link[data.table]{data.table}}] with ids of added jobs stored in column \dQuote{job.id}. #' @export #' @family Experiment #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE) #' #' # add first problem #' fun = function(job, data, n, mean, sd, ...) rnorm(n, mean = mean, sd = sd) #' addProblem("rnorm", fun = fun, reg = tmp) #' #' # add second problem #' fun = function(job, data, n, lambda, ...) rexp(n, rate = lambda) #' addProblem("rexp", fun = fun, reg = tmp) #' #' # add first algorithm #' fun = function(instance, method, ...) if (method == "mean") mean(instance) else median(instance) #' addAlgorithm("average", fun = fun, reg = tmp) #' #' # add second algorithm #' fun = function(instance, ...) sd(instance) #' addAlgorithm("deviation", fun = fun, reg = tmp) #' #' # define problem and algorithm designs #' library(data.table) #' prob.designs = algo.designs = list() #' prob.designs$rnorm = CJ(n = 100, mean = -1:1, sd = 1:5) #' prob.designs$rexp = data.table(n = 100, lambda = 1:5) #' algo.designs$average = data.table(method = c("mean", "median")) #' algo.designs$deviation = data.table() #' #' # add experiments and submit #' addExperiments(prob.designs, algo.designs, reg = tmp) #' #' # check what has been created #' summarizeExperiments(reg = tmp) #' unwrap(getJobPars(reg = tmp)) addExperiments = function(prob.designs = NULL, algo.designs = NULL, repls = 1L, combine = "crossprod", reg = getDefaultRegistry()) { convertDesigns = function(type, designs, keywords) { check.factors = getRversion() < "4.0.0" && default.stringsAsFactors() Map(function(id, design) { if (check.factors && identical(class(design)[1L], "data.frame")) { i = which(vlapply(design, is.factor)) if (length(i) > 0L) { warningf("%s design '%s' passed as 'data.frame' and 'stringsAsFactors' is TRUE. Column(s) '%s' may be encoded as factors accidentally.", type, id, stri_flatten(names(design)[i]), "','") } } if (!is.data.table(design)) design = as.data.table(design) i = wf(keywords %chin% names(design)) if (length(i) > 0L) stopf("%s design %s contains reserved keyword '%s'", type, id, keywords[i]) design }, id = names(designs), design = designs) } increment = function(ids, n = 1L) { if (length(ids) == 0L) seq_len(n) else max(ids) + seq_len(n) } assertRegistry(reg, class = "ExperimentRegistry", writeable = TRUE) if (is.null(prob.designs)) { prob.designs = replicate(length(reg$problems), data.table(), simplify = FALSE) names(prob.designs) = reg$problems } else { assertList(prob.designs, types = "data.frame", names = "named") assertSubset(names(prob.designs), reg$problems) prob.designs = convertDesigns("Problem", prob.designs, c("job", "data")) } if (is.null(algo.designs)) { algo.designs = replicate(length(reg$algorithms), data.table(), simplify = FALSE) names(algo.designs) = reg$algorithms } else { assertList(algo.designs, types = "data.frame", names = "named") assertSubset(names(algo.designs), reg$algorithms) algo.designs = convertDesigns("Algorithm", algo.designs, c("job", "data", "instance")) } repls = asInteger(repls, lower = 1L, any.missing = FALSE) repls = rep_len(repls, length(prob.designs)) assertChoice(combine, c("crossprod", "bind")) all.ids = integer(0L) for (i in seq_along(prob.designs)) { pn = names(prob.designs)[i] pd = prob.designs[[i]] n.pd = max(nrow(pd), 1L) repls_cur = repls[i] for (j in seq_along(algo.designs)) { an = names(algo.designs)[j] ad = algo.designs[[j]] n.ad = max(nrow(ad), 1L) if (combine == "crossprod") { n.jobs = n.pd * n.ad * repls_cur info("Adding %i experiments ('%s'[%i] x '%s'[%i] x repls[%i]) ...", n.jobs, pn, n.pd, an, n.ad, repls_cur) idx = CJ(.i = seq_len(n.pd), .j = seq_len(n.ad)) } else { recycle = max(n.pd, n.ad) n.jobs = recycle * repls_cur info("Adding %i experiments (('%s'[%i] | '%s'[%i]) x repls[%i]) ...", n.jobs, pn, n.pd, an, n.ad, repls_cur) idx = data.table(.i = rep_len(seq_len(n.pd), recycle), .j = rep_len(seq_len(n.ad), recycle)) } # create temp tab with prob name, algo name and pars as list tab = data.table( problem = pn, algorithm = an, prob.pars = if (nrow(pd) > 0L) .mapply(list, pd[idx$.i], list()) else list(list()), algo.pars = if (nrow(ad) > 0L) .mapply(list, ad[idx$.j], list()) else list(list()) ) # create hash of each row of tab tab$pars.hash = calculateHash(tab) # merge with already defined experiments to get def.ids if (nrow(reg$defs) == 0L) { # this is no optimization, but fixes an strange error on r-devel/windows for merging empty data.tables tab$def.id = NA_integer_ } else { tab = merge(reg$defs[, !c("problem", "algorithm", "prob.pars", "algo.pars")], tab, by = "pars.hash", all.x = FALSE, all.y = TRUE, sort = FALSE) } # generate def ids for new experiments w = which(is.na(tab$def.id)) if (length(w) > 0L) { tab[w, "def.id" := increment(reg$defs$def.id, length(w))] reg$defs = rbind(reg$defs, tab[w]) } # create rows in status table for new defs and each repl and filter for defined tab = CJ(def.id = tab$def.id, repl = seq_len(repls_cur))[!reg$status, on = c("def.id", "repl")] if (nrow(tab) < n.jobs) info("Skipping %i duplicated experiments ...", n.jobs - nrow(tab)) if (nrow(tab) > 0L) { # rbind new status tab$job.id = increment(reg$status$job.id, nrow(tab)) reg$status = rbind(reg$status, tab, fill = TRUE) } all.ids = c(all.ids, tab$job.id) } } if (length(all.ids)) { setkeyv(reg$defs, "def.id") setkeyv(reg$status, "job.id") saveRegistry(reg) } invisible(data.table(job.id = all.ids, key = "job.id")) } calculateHash = function(tab) { cols = c("problem", "algorithm", "prob.pars", "algo.pars") unlist(.mapply(function(...) digest(list(...)), tab[, cols, with = FALSE], list())) } ================================================ FILE: R/batchMap.R ================================================ #' @title Map Operation for Batch Systems #' #' @description #' A parallel and asynchronous \code{\link[base]{Map}}/\code{\link[base]{mapply}} for batch systems. #' Note that this function only defines the computational jobs. #' The actual computation is started with \code{\link{submitJobs}}. #' Results and partial results can be collected with \code{\link{reduceResultsList}}, \code{\link{reduceResults}} or #' \code{\link{loadResult}}. #' #' For a synchronous \code{\link[base]{Map}}-like execution, see \code{\link{btmapply}}. #' #' @param fun [\code{function}]\cr #' Function to map over arguments provided via \code{...}. #' Parameters given via \code{args} or \code{...} are passed as-is, in the respective order and possibly named. #' If the function has the named formal argument \dQuote{.job}, the \code{\link{Job}} is passed to the function #' on the slave. #' @param ... [ANY]\cr #' Arguments to vectorize over (list or vector). #' Shorter vectors will be recycled (possibly with a warning any length is not a multiple of the longest length). #' Mutually exclusive with \code{args}. #' Note that although it is possible to iterate over large objects (e.g., lists of data frames or matrices), this usually #' hurts the overall performance and thus is discouraged. #' @param args [\code{list} | \code{data.frame}]\cr #' Arguments to vectorize over as (named) list or data frame. #' Shorter vectors will be recycled (possibly with a warning any length is not a multiple of the longest length). #' Mutually exclusive with \code{...}. #' @template more.args #' @template reg #' @return [\code{\link[data.table]{data.table}}] with ids of added jobs stored in column \dQuote{job.id}. #' @export #' @seealso \code{\link{batchReduce}} #' @examples #' \dontshow{ batchtools:::example_push_temp(3) } #' # example using "..." and more.args #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' f = function(x, y) x^2 + y #' ids = batchMap(f, x = 1:10, more.args = list(y = 100), reg = tmp) #' getJobPars(reg = tmp) #' testJob(6, reg = tmp) # 100 + 6^2 = 136 #' #' # vector recycling #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' f = function(...) list(...) #' ids = batchMap(f, x = 1:3, y = 1:6, reg = tmp) #' getJobPars(reg = tmp) #' #' # example for an expand.grid()-like operation on parameters #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' ids = batchMap(paste, args = data.table::CJ(x = letters[1:3], y = 1:3), reg = tmp) #' getJobPars(reg = tmp) #' testJob(6, reg = tmp) batchMap = function(fun, ..., args = list(), more.args = list(), reg = getDefaultRegistry()) { list2dt = function(x) { # converts a list to a data.table, but avoids creating column names nn = names(x) if (is.null(nn)) names(x) = rep.int("", length(x)) as.data.table(x) } assertRegistry(reg, class = "Registry", writeable = TRUE) if (nrow(reg$defs) > 0L) stop("Registry must be empty") assertFunction(fun) assert(checkList(args), checkDataFrame(args)) assertList(more.args) if (length(args) > 0L) { if (...length() > 0L) stop("You may only provide arguments via '...' *or* 'args'") ddd = list2dt(args) } else { ddd = list2dt(list(...)) } if (".job" %chin% names(ddd)) stop("Name '.job' not allowed as parameter name (reserved keyword)") if (any(dim(ddd) == 0L)) return(noIds()) info("Adding %i jobs ...", nrow(ddd)) writeRDS(fun, file = fs::path(reg$file.dir, "user.function.rds"), compress = reg$compress) if (length(more.args) > 0L) writeRDS(more.args, file = fs::path(reg$file.dir, "more.args.rds"), compress = reg$compress) ids = seq_row(ddd) reg$defs = data.table( def.id = ids, job.pars = .mapply(list, dots = ddd, MoreArgs = list()), key = "def.id") reg$status = data.table( job.id = ids, def.id = ids, submitted = NA_real_, started = NA_real_, done = NA_real_, error = NA_character_, mem.used = NA_real_, resource.id = NA_integer_, batch.id = NA_character_, log.file = NA_character_, job.hash = NA_character_, job.name = NA_character_, key = "job.id") saveRegistry(reg) invisible(allIds(reg)) } ================================================ FILE: R/batchMapResults.R ================================================ #' @title Map Over Results to Create New Jobs #' #' @description #' This function allows you to create new computational jobs (just like \code{\link{batchMap}} based on the results of #' a \code{\link{Registry}}. #' #' @note #' The URI to the result files in registry \code{source} is hard coded as parameter in the \code{target} registry. #' This means that \code{target} is currently not portable between systems for computation. #' #' @templateVar ids.default findDone #' @param fun [\code{function}]\cr #' Function which takes the result as first (unnamed) argument. #' @template ids #' @param ... [ANY]\cr #' Arguments to vectorize over (list or vector). Passed to \code{\link{batchMap}}. #' @template more.args #' @param target [\code{\link{Registry}}]\cr #' Empty Registry where new jobs are created for. #' @param source [\code{\link{Registry}}]\cr #' Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}). #' @return [\code{\link[data.table]{data.table}}] with ids of jobs added to \code{target}. #' @export #' @family Results #' @examples #' \dontshow{ batchtools:::example_push_temp(2) } #' # Source registry: calculate square of some numbers #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' batchMap(function(x) list(square = x^2), x = 1:10, reg = tmp) #' submitJobs(reg = tmp) #' waitForJobs(reg = tmp) #' #' # Target registry: calculate the square root on results of first registry #' target = makeRegistry(file.dir = NA, make.default = FALSE) #' batchMapResults(fun = function(x, y) list(sqrt = sqrt(x$square)), ids = 4:8, #' target = target, source = tmp) #' submitJobs(reg = target) #' waitForJobs(reg = target) #' #' # Map old to new ids. First, get a table with results and parameters #' results = unwrap(rjoin(getJobPars(reg = target), reduceResultsDataTable(reg = target))) #' print(results) #' #' # Parameter '.id' points to job.id in 'source'. Use a inner join to combine: #' ijoin(results, unwrap(reduceResultsDataTable(reg = tmp)), by = c(".id" = "job.id")) batchMapResults = function(fun, ids = NULL, ..., more.args = list(), target, source = getDefaultRegistry()) { assertRegistry(source, sync = TRUE) assertRegistry(target, writeable = TRUE, sync = TRUE) assertFunction(fun) ids = convertIds(source, ids, default = .findDone(reg = source)) assertList(more.args, names = "strict") if (nrow(target$status) > 0L) stop("Target registry 'target' must be empty") fns = getResultFiles(source, ids) names(fns) = ids$job.id more.args = c(list(.fn = fns, .fun = fun), more.args) args = c(list(.id = ids$job.id), list(...)) batchMap(batchMapResultsWrapper, args = args, more.args = more.args, reg = target) } batchMapResultsWrapper = function(.fun, .fn, .id, ...) { .fun(readRDS(.fn[[as.character(.id)]]), ...) } ================================================ FILE: R/batchReduce.R ================================================ #' @title Reduce Operation for Batch Systems #' #' @description #' A parallel and asynchronous \code{\link[base]{Reduce}} for batch systems. #' Note that this function only defines the computational jobs. #' Each job reduces a certain number of elements on one slave. #' The actual computation is started with \code{\link{submitJobs}}. #' Results and partial results can be collected with \code{\link{reduceResultsList}}, \code{\link{reduceResults}} or #' \code{\link{loadResult}}. #' #' @param fun [\code{function(aggr, x, ...)}]\cr #' Function to reduce \code{xs} with. #' @param xs [\code{vector}]\cr #' Vector to reduce. #' @param init [ANY]\cr #' Initial object for reducing. See \code{\link[base]{Reduce}}. #' @param chunks [\code{integer(length(xs))}]\cr #' Group for each element of \code{xs}. Can be generated with \code{\link{chunk}}. #' @param more.args [\code{list}]\cr #' A list of additional arguments passed to \code{fun}. #' @template reg #' @return [\code{\link[data.table]{data.table}}] with ids of added jobs stored in column \dQuote{job.id}. #' @export #' @seealso \code{\link{batchMap}} #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' # define function to reduce on slave, we want to sum a vector #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' xs = 1:100 #' f = function(aggr, x) aggr + x #' #' # sum 20 numbers on each slave process, i.e. 5 jobs #' chunks = chunk(xs, chunk.size = 5) #' batchReduce(fun = f, 1:100, init = 0, chunks = chunks, reg = tmp) #' submitJobs(reg = tmp) #' waitForJobs(reg = tmp) #' #' # now reduce one final time on master #' reduceResults(fun = function(aggr, job, res) f(aggr, res), reg = tmp) batchReduce = function(fun, xs, init = NULL, chunks = seq_along(xs), more.args = list(), reg = getDefaultRegistry()) { assertRegistry(reg, class = "Registry", writeable = TRUE) if (nrow(reg$defs) > 0L) stop("Registry must be empty") assertFunction(fun, c("aggr", "x")) assertAtomicVector(xs) assertIntegerish(chunks, len = length(xs), any.missing = FALSE, lower = 0L) assertList(more.args, names = "strict") more.args = c(more.args, list(.fun = fun, .init = init)) batchMap(batchReduceWrapper, unname(split(xs, chunks)), more.args = more.args, reg = reg) } batchReduceWrapper = function(xs.block, .fun, .init, ...) { fun = function(aggr, x) .fun(aggr, x, ...) Reduce(fun, xs.block, init = .init) } ================================================ FILE: R/btlapply.R ================================================ #' @title Synchronous Apply Functions #' #' @description #' This is a set of functions acting as counterparts to the sequential popular apply functions in base R: #' \code{btlapply} for \code{\link[base]{lapply}} and \code{btmapply} for \code{\link[base]{mapply}}. #' #' Internally, jobs are created using \code{\link{batchMap}} on the provided registry. #' If no registry is provided, a temporary registry (see argument \code{file.dir} of \code{\link{makeRegistry}}) and \code{\link{batchMap}} #' will be used. #' After all jobs are terminated (see \code{\link{waitForJobs}}), the results are collected and returned as a list. #' #' Note that these functions are only suitable for short and fail-safe operations #' on batch system. If some jobs fail, you have to retrieve partial results from the #' registry directory yourself. #' #' @param X [\code{\link[base]{vector}}]\cr #' Vector to apply over. #' @param fun [\code{function}]\cr #' Function to apply. #' @param more.args [\code{list}]\cr #' Additional arguments passed to \code{fun}. #' @param ... [\code{ANY}]\cr #' Additional arguments passed to \code{fun} (\code{btlapply}) or vectors to map over (\code{btmapply}). #' @inheritParams submitJobs #' @param n.chunks [\code{integer(1)}]\cr #' Passed to \code{\link{chunk}} before \code{\link{submitJobs}}. #' @param chunk.size [\code{integer(1)}]\cr #' Passed to \code{\link{chunk}} before \code{\link{submitJobs}}. #' @template reg #' @return [\code{list}] List with the results of the function call. #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' btlapply(1:3, function(x) x^2) #' btmapply(function(x, y, z) x + y + z, x = 1:3, y = 1:3, more.args = list(z = 1), simplify = TRUE) btlapply = function(X, fun, ..., resources = list(), n.chunks = NULL, chunk.size = NULL, reg = makeRegistry(file.dir = NA)) { assertVector(X) assertFunction(fun) assertRegistry(reg, class = "Registry", writeable = TRUE) ids = batchMap(fun, X, more.args = list(...), reg = reg) if (!is.null(n.chunks) || !is.null(chunk.size)) ids$chunk = chunk(ids$job.id, n.chunks = n.chunks, chunk.size = chunk.size) submitJobs(ids = ids, resources = resources, reg = reg) waitForJobs(ids = ids, reg = reg) reduceResultsList(ids = ids, reg = reg) } #' @export #' @param simplify [\code{logical(1)}]\cr #' Simplify the results using \code{\link[base]{simplify2array}}? #' @param use.names [\code{logical(1)}]\cr #' Use names of the input to name the output? #' @rdname btlapply btmapply = function(fun, ..., more.args = list(), simplify = FALSE, use.names = TRUE, resources = list(), n.chunks = NULL, chunk.size = NULL, reg = makeRegistry(file.dir = NA)) { assertFunction(fun) assertFlag(simplify) assertFlag(use.names) assertRegistry(reg, class = "Registry", writeable = TRUE) ids = batchMap(fun, ..., more.args = more.args, reg = reg) if (!is.null(n.chunks) || !is.null(chunk.size)) ids$chunk = chunk(ids$job.id, n.chunks = n.chunks, chunk.size = chunk.size) submitJobs(ids = ids, resources = resources, reg = reg) waitForJobs(ids = ids, reg = reg) res = reduceResultsList(ids = ids, reg = reg) if (use.names) { x = head(list(...), 1L) if (length(x) > 0L) { x = x[[1L]] if (is.null(names(x))) { if(is.character(x)) names(res) = x } else { names(res) = names(x) } } } if (simplify) simplify2array(res) else res } ================================================ FILE: R/chunkIds.R ================================================ #' @title Chunk Jobs for Sequential Execution #' #' @description #' Jobs can be partitioned into \dQuote{chunks} to be executed sequentially on the computational nodes. #' Chunks are defined by providing a data frame with columns \dQuote{job.id} and \dQuote{chunk} (integer) #' to \code{\link{submitJobs}}. #' All jobs with the same chunk number will be grouped together on one node to form a single #' computational job. #' #' The function \code{chunk} simply splits \code{x} into either a fixed number of groups, or #' into a variable number of groups with a fixed number of maximum elements. #' #' The function \code{lpt} also groups \code{x} into a fixed number of chunks, #' but uses the actual values of \code{x} in a greedy \dQuote{Longest Processing Time} algorithm. #' As a result, the maximum sum of elements in minimized. #' #' \code{binpack} splits \code{x} into a variable number of groups whose sum of elements do #' not exceed the upper limit provided by \code{chunk.size}. #' #' See examples of \code{\link{estimateRuntimes}} for an application of \code{binpack} and \code{lpt}. #' #' @param x [\code{numeric}]\cr #' For \code{chunk} an atomic vector (usually the \code{job.id}). #' For \code{binpack} and \code{lpt}, the weights to group. #' @param chunk.size [\code{integer(1)}]\cr #' Requested chunk size for each single chunk. #' For \code{chunk} this is the number of elements in \code{x}, for \code{binpack} the size #' is determined by the sum of values in \code{x}. #' Mutually exclusive with \code{n.chunks}. #' @param n.chunks [\code{integer(1)}]\cr #' Requested number of chunks. #' The function \code{chunk} distributes the number of elements in \code{x} evenly while #' \code{lpt} tries to even out the sum of elements in each chunk. #' If more chunks than necessary are requested, empty chunks are ignored. #' Mutually exclusive with \code{chunks.size}. #' @param shuffle [\code{logical(1)}]\cr #' Shuffles the groups. Default is \code{TRUE}. #' @return [\code{integer}] giving the chunk number for each element of \code{x}. #' @seealso \code{\link{estimateRuntimes}} #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(2) } #' ch = chunk(1:10, n.chunks = 2) #' table(ch) #' #' ch = chunk(rep(1, 10), chunk.size = 2) #' table(ch) #' #' set.seed(1) #' x = runif(10) #' ch = lpt(x, n.chunks = 2) #' sapply(split(x, ch), sum) #' #' set.seed(1) #' x = runif(10) #' ch = binpack(x, 1) #' sapply(split(x, ch), sum) #' #' # Job chunking #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' ids = batchMap(identity, 1:25, reg = tmp) #' #' ### Group into chunks with 10 jobs each #' library(data.table) #' ids[, chunk := chunk(job.id, chunk.size = 10)] #' print(ids[, .N, by = chunk]) #' #' ### Group into 4 chunks #' ids[, chunk := chunk(job.id, n.chunks = 4)] #' print(ids[, .N, by = chunk]) #' #' ### Submit to batch system #' submitJobs(ids = ids, reg = tmp) #' #' # Grouped chunking #' tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE) #' prob = addProblem(reg = tmp, "prob1", data = iris, fun = function(job, data) nrow(data)) #' prob = addProblem(reg = tmp, "prob2", data = Titanic, fun = function(job, data) nrow(data)) #' algo = addAlgorithm(reg = tmp, "algo", fun = function(job, data, instance, i, ...) problem) #' prob.designs = list(prob1 = data.table(), prob2 = data.table(x = 1:2)) #' algo.designs = list(algo = data.table(i = 1:3)) #' addExperiments(prob.designs, algo.designs, repls = 3, reg = tmp) #' #' ### Group into chunks of 5 jobs, but do not put multiple problems into the same chunk #' # -> only one problem has to be loaded per chunk, and only once because it is cached #' ids = getJobTable(reg = tmp)[, .(job.id, problem, algorithm)] #' ids[, chunk := chunk(job.id, chunk.size = 5), by = "problem"] #' ids[, chunk := .GRP, by = c("problem", "chunk")] #' dcast(ids, chunk ~ problem) chunk = function(x, n.chunks = NULL, chunk.size = NULL, shuffle = TRUE) { assertAtomicVector(x) if (!xor(is.null(n.chunks), is.null(chunk.size))) stop("You must provide either 'n.chunks' (x)or 'chunk.size'") assertCount(n.chunks, positive = TRUE, null.ok = TRUE) assertCount(chunk.size, positive = TRUE, null.ok = TRUE) assertFlag(shuffle) n = length(x) if (n == 0L) return(integer(0L)) if (is.null(n.chunks)) n.chunks = (n %/% chunk.size + (n %% chunk.size > 0L)) chunks = as.integer((seq.int(0L, n - 1L) %% min(n.chunks, n))) + 1L if (shuffle) chunks = sample(chunks) else chunks = sort(chunks) return(chunks) } #' @rdname chunk #' @useDynLib batchtools c_lpt #' @export lpt = function(x, n.chunks = 1L) { assertNumeric(x, lower = 0, any.missing = FALSE, finite = TRUE) assertCount(n.chunks, positive = TRUE) x = as.double(x) ord = order(x, decreasing = TRUE) n.chunks = as.integer(n.chunks) .Call(c_lpt, x, ord, n.chunks) } #' @rdname chunk #' @useDynLib batchtools c_binpack #' @export binpack = function(x, chunk.size = max(x)) { assertNumeric(x, lower = 0, any.missing = FALSE, finite = TRUE) assertNumber(chunk.size, lower = 0) if (length(x) == 0L) return(integer(0L)) x = as.double(x) ord = order(x, decreasing = TRUE) chunk.size = as.double(chunk.size) .Call(c_binpack, x, ord, chunk.size) } ================================================ FILE: R/clearRegistry.R ================================================ #' @title Remove All Jobs #' @description #' Removes all jobs from a registry and calls \code{\link{sweepRegistry}}. #' #' @template reg #' @family Registry #' @export clearRegistry = function(reg = getDefaultRegistry()) { assertRegistry(reg, writeable = TRUE, sync = TRUE, running.ok = FALSE) info("Removing %i jobs ...", nrow(reg$status)) reg$status = reg$status[FALSE] reg$defs = reg$defs[FALSE] reg$resources = reg$resources[FALSE] user.fun = fs::path(reg$file.dir, "user.function.rds") if (fs::file_exists(user.fun)) { info("Removing user function ...") file_remove(user.fun) } sweepRegistry(reg = reg) } ================================================ FILE: R/clusterFunctions.R ================================================ #' @title ClusterFunctions Constructor #' #' @description #' This is the constructor used to create \emph{custom} cluster functions. #' Note that some standard implementations for TORQUE, Slurm, LSF, SGE, etc. ship #' with the package. #' #' @param name [\code{character(1)}]\cr #' Name of cluster functions. #' @param submitJob [\code{function(reg, jc, ...)}]\cr #' Function to submit new jobs. Must return a \code{\link{SubmitJobResult}} object. #' The arguments are \code{reg} (\code{\link{Registry}}) and \code{jobs} (\code{\link{JobCollection}}). #' @param killJob [\code{function(reg, batch.id)}]\cr #' Function to kill a job on the batch system. Make sure that you definitely kill the job! Return #' value is currently ignored. Must have the arguments \code{reg} (\code{\link{Registry}}) and #' \code{batch.id} (\code{character(1)} as returned by \code{submitJob}). #' Note that there is a helper function \code{\link{cfKillJob}} to repeatedly try to kill jobs. #' Set \code{killJob} to \code{NULL} if killing jobs cannot be supported. #' @param listJobsQueued [\code{function(reg)}]\cr #' List all queued jobs on the batch system for the current user. #' Must return an character vector of batch ids, same format as they #' are returned by \code{submitJob}. #' Set \code{listJobsQueued} to \code{NULL} if listing of queued jobs is not supported. #' @param listJobsRunning [\code{function(reg)}]\cr #' List all running jobs on the batch system for the current user. #' Must return an character vector of batch ids, same format as they #' are returned by \code{submitJob}. It does not matter if you return a few job ids too many (e.g. #' all for the current user instead of all for the current registry), but you have to include all #' relevant ones. Must have the argument are \code{reg} (\code{\link{Registry}}). #' Set \code{listJobsRunning} to \code{NULL} if listing of running jobs is not supported. #' @param array.var [\code{character(1)}]\cr #' Name of the environment variable set by the scheduler to identify IDs of job arrays. #' Default is \code{NA} for no array support. #' @param store.job.collection [\code{logical(1)}]\cr #' Flag to indicate that the cluster function implementation of \code{submitJob} can not directly handle \code{\link{JobCollection}} objects. #' If set to \code{FALSE}, the \code{\link{JobCollection}} is serialized to the file system before submitting the job. #' @param store.job.files [\code{logical(1)}]\cr #' Flag to indicate that job files need to be stored in the file directory. #' If set to \code{FALSE} (default), the job file is created in a temporary directory, otherwise (or if the debug mode is enabled) in #' the subdirectory \code{jobs} of the \code{file.dir}. #' @param scheduler.latency [\code{numeric(1)}]\cr #' Time to sleep after important interactions with the scheduler to ensure a sane state. #' Currently only triggered after calling \code{\link{submitJobs}}. #' @param fs.latency [\code{numeric(1)}]\cr #' Expected maximum latency of the file system, in seconds. #' Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to #' access files and directories. #' Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system. #' @param hooks [\code{list}]\cr #' Named list of functions which will we called on certain events like \dQuote{pre.submit} or \dQuote{post.sync}. #' See \link{Hooks}. #' @export #' @aliases ClusterFunctions #' @family ClusterFunctions #' @family ClusterFunctionsHelper makeClusterFunctions = function(name, submitJob, killJob = NULL, listJobsQueued = NULL, listJobsRunning = NULL, array.var = NA_character_, store.job.collection = FALSE, store.job.files = FALSE, scheduler.latency = 0, fs.latency = 0, hooks = list()) { assertList(hooks, types = "function", names = "unique") assertSubset(names(hooks), unlist(batchtools$hooks, use.names = FALSE)) setClasses(list( name = assertString(name, min.chars = 1L), submitJob = assertFunction(submitJob, c("reg", "jc"), null.ok = TRUE), killJob = assertFunction(killJob, c("reg", "batch.id"), null.ok = TRUE), listJobsQueued = assertFunction(listJobsQueued, "reg", null.ok = TRUE), listJobsRunning = assertFunction(listJobsRunning, "reg", null.ok = TRUE), array.var = assertString(array.var, na.ok = TRUE), store.job.collection = assertFlag(store.job.collection), store.job.files = assertFlag(store.job.files), scheduler.latency = assertNumber(scheduler.latency, lower = 0), fs.latency = assertNumber(fs.latency, lower = 0), hooks = hooks), "ClusterFunctions") } #' @export print.ClusterFunctions = function(x, ...) { catf("ClusterFunctions for mode: %s", x$name) catf(" List queued Jobs : %s", !is.null(x$listJobsQueued)) catf(" List running Jobs: %s", !is.null(x$listJobsRunning)) catf(" Kill Jobs : %s", !is.null(x$killJob)) catf(" Hooks : %s", if (length(x$hooks)) stri_flatten(names(x$hooks), ",") else "-") } #' @title Create a SubmitJobResult #' #' @description #' This function is only intended for use in your own cluster functions implementation. #' #' Use this function in your implementation of \code{\link{makeClusterFunctions}} to create a return #' value for the \code{submitJob} function. #' #' @param status [\code{integer(1)}]\cr #' Launch status of job. 0 means success, codes between 1 and 100 are temporary errors and any #' error greater than 100 is a permanent failure. #' @param batch.id [\code{character()}]\cr #' Unique id of this job on batch system, as given by the batch system. #' Must be globally unique so that the job can be terminated using just this information. #' For array jobs, this may be a vector of length equal to the number of jobs in the array. #' @param log.file [\code{character()}]\cr #' Log file. If \code{NA}, defaults to \code{[job.hash].log}. #' Some cluster functions set this for array jobs. #' @param msg [\code{character(1)}]\cr #' Optional error message in case \code{status} is not equal to 0. Default is \dQuote{OK}, #' \dQuote{TEMPERROR}, \dQuote{ERROR}, depending on \code{status}. #' @return [\code{\link{SubmitJobResult}}]. A list, containing #' \code{status}, \code{batch.id} and \code{msg}. #' @family ClusterFunctionsHelper #' @aliases SubmitJobResult #' @export makeSubmitJobResult = function(status, batch.id, log.file = NA_character_, msg = NA_character_) { status = asInt(status) if (is.na(msg)) { msg = if (status == 0L) "OK" else if (status <= 100L) "TEMPERROR" else "ERROR" } "!DEBUG [makeSubmitJobResult]: Result for batch.id '`paste0(batch.id, sep = ',')`': `status` (`msg`)" setClasses(list(status = status, batch.id = batch.id, log.file = log.file, msg = msg), "SubmitJobResult") } #' @export print.SubmitJobResult = function(x, ...) { cat("Job submission result\n") catf(" ID : %s", stri_flatten(x$batch.id, ",")) catf(" Status: %i", x$status) catf(" Msg : %s", x$msg) } #' @title Cluster Functions Helper to Parse a Brew Template #' #' @description #' This function is only intended for use in your own cluster functions implementation. #' #' This function is only intended for use in your own cluster functions implementation. #' Simply reads your template file and returns it as a character vector. #' #' @param template [\code{character(1)}]\cr #' Path to template file which is then passed to \code{\link[brew]{brew}}. #' @param comment.string [\code{character(1)}]\cr #' Ignore lines starting with this string. #' @return [\code{character}]. #' @family ClusterFunctionsHelper #' @export cfReadBrewTemplate = function(template, comment.string = NA_character_) { "!DEBUG [cfReadBrewTemplate]: Parsing template file '`template`'" lines = stri_trim_both(readLines(template)) lines = lines[!stri_isempty(lines)] if (!is.na(comment.string)) lines = lines[!stri_startswith_fixed(lines, comment.string)] if (length(lines) == 0L) stopf("Error reading template '%s' or empty template", template) return(stri_flatten(lines, "\n")) } #' @title Cluster Functions Helper to Write Job Description Files #' #' @description #' This function is only intended for use in your own cluster functions implementation. #' #' Calls brew silently on your template, any error will lead to an exception. #' The file is stored at the same place as the corresponding job file in the \dQuote{jobs}-subdir #' of your files directory. #' #' @template reg #' @param text [\code{character(1)}]\cr #' String ready to be brewed. See \code{\link{cfReadBrewTemplate}} to read a template from the file system. #' @param jc [\code{\link{JobCollection})}]\cr #' Will be used as environment to brew the template file in. See \code{\link{JobCollection}} for a list of all #' available variables. #' @return [\code{character(1)}]. File path to brewed template file. #' @family ClusterFunctionsHelper #' @export cfBrewTemplate = function(reg, text, jc) { assertString(text) outfile = fs::path(dir(reg, "jobs"), sprintf("%s.job", jc$job.hash)) parent.env(jc) = asNamespace("batchtools") on.exit(parent.env(jc) <- emptyenv()) "!DEBUG [cfBrewTemplate]: Brewing template to file '`outfile`'" z = try(brew(text = text, output = outfile, envir = jc), silent = TRUE) if (is.error(z)) stopf("Error brewing template: %s", as.character(z)) waitForFile(outfile, reg$cluster.functions$fs.latency) return(outfile) } #' @title Cluster Functions Helper to Handle Unknown Errors #' #' @description #' This function is only intended for use in your own cluster functions implementation. #' #' Simply constructs a \code{\link{SubmitJobResult}} object with status code 101, NA as batch id and #' an informative error message containing the output of the OS command in \code{output}. #' #' @param cmd [\code{character(1)}]\cr #' OS command used to submit the job, e.g. qsub. #' @param exit.code [\code{integer(1)}]\cr #' Exit code of the OS command, should not be 0. #' @param output [\code{character}]\cr #' Output of the OS command, hopefully an informative error message. #' If these are multiple lines in a vector, they are automatically joined. #' @return [\code{\link{SubmitJobResult}}]. #' @family ClusterFunctionsHelper #' @export cfHandleUnknownSubmitError = function(cmd, exit.code, output) { assertString(cmd, min.chars = 1L) exit.code = asInt(exit.code) assertCharacter(output, any.missing = FALSE) msg = sprintf("Command '%s' produced exit code %i. Output: '%s'", cmd, exit.code, stri_flatten(output, "\n")) makeSubmitJobResult(status = 101L, batch.id = NA_character_, msg = msg) } #' @title Cluster Functions Helper to Kill Batch Jobs #' #' @description #' This function is only intended for use in your own cluster functions implementation. #' #' Calls the OS command to kill a job via \code{\link[base]{system}} like this: \dQuote{cmd batch.job.id}. If the #' command returns an exit code > 0, the command is repeated after a 1 second sleep #' \code{max.tries-1} times. If the command failed in all tries, an error is generated. #' #' @template reg #' @param cmd [\code{character(1)}]\cr #' OS command, e.g. \dQuote{qdel}. #' @param args [\code{character}]\cr #' Arguments to \code{cmd}, including the batch id. #' @param max.tries [\code{integer(1)}]\cr #' Number of total times to try execute the OS command in cases of failures. #' Default is \code{3}. #' @inheritParams runOSCommand #' @return \code{TRUE} on success. An exception is raised otherwise. #' @family ClusterFunctionsHelper #' @export cfKillJob = function(reg, cmd, args = character(0L), max.tries = 3L, nodename = "localhost") { assertString(cmd, min.chars = 1L) assertCharacter(args, any.missing = FALSE) assertString(nodename) max.tries = asCount(max.tries) for (i in seq_len(max.tries)) { res = runOSCommand(cmd, args, nodename = nodename) if (res$exit.code == 0L) return(TRUE) Sys.sleep(1) } stopf("Really tried to kill job, but failed %i times with '%s'.\nMessage: %s", max.tries, stri_flatten(c(cmd, args), " "), stri_flatten(res$output, "\n")) } getBatchIds = function(reg, status = "all") { cf = reg$cluster.functions tab = data.table(batch.id = character(0L), status = character(0L)) batch.id = NULL if (status %chin% c("all", "running") && !is.null(cf$listJobsRunning)) { "!DEBUG [getBatchIds]: Getting running Jobs" x = unique(cf$listJobsRunning(reg)) if (length(x) > 0L) tab = rbind(tab, data.table(batch.id = x, status = "running")) } if (status %chin% c("all", "queued") && !is.null(cf$listJobsQueued)) { "!DEBUG [getBatchIds]: Getting queued Jobs" x = chsetdiff(cf$listJobsQueued(reg), tab$batch.id) if (length(x) > 0L) tab = rbind(tab, data.table(batch.id = unique(x), status = "queued")) } submitted = done = batch.id = NULL batch.ids = reg$status[!is.na(submitted) & is.na(done) & !is.na(batch.id), unique(batch.id)] tab[batch.id %in% batch.ids] } #' @title Find a batchtools Template File #' #' @description #' This functions returns the path to a template file on the file system. #' @template template #' @return [\code{character}] Path to the file or \code{NA} if no template template file was found. #' @keywords internal #' @export findTemplateFile = function(template) { assertString(template, min.chars = 1L) if (stri_endswith_fixed(template, ".tmpl")) { assertFileExists(template, access = "r") return(fs::path_abs(template)) } x = Sys.getenv("R_BATCHTOOLS_SEARCH_PATH") if (nzchar(x)) { x = fs::path(x, sprintf("batchtools.%s.tmpl", template)) if (fs::file_access(x, "read")) return(fs::path_abs(x)) } x = sprintf("batchtools.%s.tmpl", template) if (fs::file_access(x, "read")) return(fs::path_abs(x)) x = fs::path(user_config_dir("batchtools", expand = FALSE), sprintf("%s.tmpl", template)) if (fs::file_access(x, "read")) return(x) x = fs::path("~", sprintf(".batchtools.%s.tmpl", template)) if (fs::file_access(x, "read")) return(fs::path_abs(x)) x = fs::path(site_config_dir("batchtools"), sprintf("%s.tmpl", template)) if (fs::file_access(x, "read")) return(x) x = system.file("templates", sprintf("%s.tmpl", template), package = "batchtools") if (fs::file_access(x, "read")) return(x) return(NA_character_) } ================================================ FILE: R/clusterFunctionsDocker.R ================================================ #' @title ClusterFunctions for Docker #' #' @description #' Cluster functions for Docker/Docker Swarm (\url{https://docs.docker.com/engine/swarm/}). #' #' The \code{submitJob} function executes #' \code{docker [docker.args] run --detach=true [image.args] [resources] [image] [cmd]}. #' Arguments \code{docker.args}, \code{image.args} and \code{image} can be set on construction. #' The \code{resources} part takes the named resources \code{ncpus} and \code{memory} #' from \code{\link{submitJobs}} and maps them to the arguments \code{--cpu-shares} and \code{--memory} #' (in Megabytes). The resource \code{threads} is mapped to the environment variables \dQuote{OMP_NUM_THREADS} #' and \dQuote{OPENBLAS_NUM_THREADS}. #' To reliably identify jobs in the swarm, jobs are labeled with \dQuote{batchtools=[job.hash]} and named #' using the current login name (label \dQuote{user}) and the job hash (label \dQuote{batchtools}). #' #' \code{listJobsRunning} uses \code{docker [docker.args] ps --format=\{\{.ID\}\}} to filter for running jobs. #' #' \code{killJobs} uses \code{docker [docker.args] kill [batch.id]} to filter for running jobs. #' #' These cluster functions use a \link{Hook} to remove finished jobs before a new submit and every time the \link{Registry} #' is synchronized (using \code{\link{syncRegistry}}). #' This is currently required because docker does not remove terminated containers automatically. #' Use \code{docker ps -a --filter 'label=batchtools' --filter 'status=exited'} to identify and remove terminated #' containers manually (or usa a cron job). #' #' @param image [\code{character(1)}]\cr #' Name of the docker image to run. #' @param docker.args [\code{character}]\cr #' Additional arguments passed to \dQuote{docker} *before* the command (\dQuote{run}, \dQuote{ps} or \dQuote{kill}) to execute (e.g., the docker host). #' @param image.args [\code{character}]\cr #' Additional arguments passed to \dQuote{docker run} (e.g., to define mounts or environment variables). #' @inheritParams makeClusterFunctions #' @return [\code{\link{ClusterFunctions}}]. #' @family ClusterFunctions #' @export makeClusterFunctionsDocker = function(image, docker.args = character(0L), image.args = character(0L), scheduler.latency = 1, fs.latency = 65) { # nocov start assertString(image) assertCharacter(docker.args, any.missing = FALSE) assertCharacter(image.args, any.missing = FALSE) user = Sys.info()["user"] submitJob = function(reg, jc) { assertRegistry(reg, writeable = TRUE) assertClass(jc, "JobCollection") assertIntegerish(jc$resources$ncpus, lower = 1L, any.missing = FALSE, .var.name = "resources$ncpus") assertIntegerish(jc$resources$memory, lower = 1L, any.missing = FALSE, .var.name = "resources$memory") timeout = if (is.null(jc$resources$walltime)) character(0L) else sprintf("timeout %i", asInt(jc$resources$walltime, lower = 0L)) cmd = c("docker", docker.args, "run", "--detach=true", image.args, sprintf("-e DEBUGME='%s'", Sys.getenv("DEBUGME")), sprintf("-e OMP_NUM_THREADS=%i", jc$resources$omp.threads %??% jc$resources$threads), sprintf("-e OPENBLAS_NUM_THREADS=%i", jc$resources$blas.threads %??% jc$resources$threads), sprintf("-e MKL_NUM_THREADS=%i", jc$resources$blas.threads %??% jc$resources$threads), sprintf("-c %i", jc$resources$ncpus), sprintf("-m %im", jc$resources$memory), sprintf("--memory-swap %im", jc$resources$memory), sprintf("--label batchtools=%s", jc$job.hash), sprintf("--label user=%s", user), sprintf("--name=%s_bt_%s", user, jc$job.hash), image, timeout, "Rscript", stri_join("-e", shQuote(sprintf("batchtools::doJobCollection('%s', '%s')", jc$uri, jc$log.file)), sep = " ")) res = runOSCommand(cmd[1L], cmd[-1L]) if (res$exit.code > 0L) { housekeeping(reg) no.res.msg = "no resources available" if (res$exit.code == 1L && any(stri_detect_fixed(res$output, no.res.msg))) return(makeSubmitJobResult(status = 1L, batch.id = NA_character_, msg = no.res.msg)) return(cfHandleUnknownSubmitError(stri_flatten(cmd, " "), res$exit.code, res$output)) } else { if (length(res$output != 1L)) { matches = which(stri_detect_regex(res$output, "^[[:alnum:]]{64}$")) if (length(matches) != 1L) stopf("Command '%s' did not return a long UUID identitfier", stri_flatten(cmd, " ")) res$output = res$output[matches] } return(makeSubmitJobResult(status = 0L, batch.id = stri_sub(res$output, 1L, 12L))) } } listJobs = function(reg, filter = character(0L)) { assertRegistry(reg, writeable = FALSE) # use a workaround for DockerSwarm: docker ps does not list all jobs correctly, only # docker inspect reports the status correctly args = c(docker.args, "ps", "--format={{.ID}}", "--filter 'label=batchtools'", filter) res = runOSCommand("docker", args) if (res$exit.code > 0L) OSError("Listing of jobs failed", res) if (length(res$output) == 0L || !nzchar(res$output)) return(character(0L)) res$output } housekeeping = function(reg, ...) { batch.ids = chintersect(listJobs(reg, "--filter 'status=exited'"), reg$status$batch.id) if (length(batch.ids) > 0L) runOSCommand("docker", c(docker.args, "rm", batch.ids)) invisible(TRUE) } killJob = function(reg, batch.id) { assertRegistry(reg, writeable = TRUE) assertString(batch.id) cfKillJob(reg, "docker", c(docker.args, "kill", batch.id)) } listJobsRunning = function(reg) { assertRegistry(reg, writeable = FALSE) listJobs(reg, sprintf("--filter 'user=%s'", user)) } makeClusterFunctions(name = "Docker", submitJob = submitJob, killJob = killJob, listJobsRunning = listJobsRunning, store.job.collection = TRUE, scheduler.latency = scheduler.latency, fs.latency = fs.latency, hooks = list(post.submit = housekeeping, post.sync = housekeeping)) } # nocov end ================================================ FILE: R/clusterFunctionsHyperQueue.R ================================================ #' @title ClusterFunctions for HyperQueue #' #' @description #' Cluster functions for HyperQueue (\url{https://it4innovations.github.io/hyperqueue/stable/}). #' #' Jobs are submitted via the HyperQueue CLI using \code{hq submit} and executed by calling \code{Rscript -e "batchtools::doJobCollection(...)"}. #' The job name is set to the job hash and logs are handled internally by batchtools. #' Listing jobs uses \code{hq job list} and cancelling jobs uses \code{hq job cancel}. #' A running HyperQueue server and workers are required. #' #' #' @inheritParams makeClusterFunctions #' @return [ClusterFunctions]. #' @family ClusterFunctions #' @export makeClusterFunctionsHyperQueue = function(scheduler.latency = 1, fs.latency = 65) { submitJob = function(reg, jc) { assertRegistry(reg, writeable = TRUE) assertClass(jc, "JobCollection") ncpus = if (!is.null(jc$resources$ncpus)) sprintf("--cpus=%i", jc$resources$ncpus) memory = if (!is.null(jc$resources$memory)) sprintf("--resource mem=%iMiB", jc$resources$memory) # time-limit is the maximum time the job can run, time-request is the minimum remaining lifetime a worker must have walltime = if (!is.null(jc$resources$walltime)) sprintf("--time-limit=%is --time-request=%is", jc$resources$walltime, jc$resources$walltime) args = c( "submit", sprintf("--name=%s", jc$job.hash), # hyperqueue cannot write stdout and stderr to the same file "--stdout=none", "--stderr=none", ncpus, memory, walltime, "--", "Rscript", "-e", shQuote(sprintf("batchtools::doJobCollection('%s', '%s')", jc$uri, jc$log.file)) ) res = runOSCommand("hq", args) if (res$exit.code > 0L) { return(cfHandleUnknownSubmitError("hq", res$exit.code, res$output)) } batch_ids = sub(".*job ID: ([0-9]+).*", "\\1", res$output) makeSubmitJobResult(status = 0L, batch.id = batch_ids) } killJob = function(reg, batch.id) { assertRegistry(reg, writeable = TRUE) assertString(batch.id) args = c("job", "cancel", batch.id) res = runOSCommand("hq", args) if (res$exit.code > 0L) { OSError("Killing of job failed", res) } makeSubmitJobResult(status = 0L, batch.id = batch.id) } listJobsQueued = function(reg) { requireNamespace("jsonlite") assertRegistry(reg, writeable = FALSE) args = c("job", "list", "--filter", "waiting", "--output-mode", "json") res = runOSCommand("hq", args) if (res$exit.code > 0L) { OSError("Listing of jobs failed", res) } jobs = jsonlite::fromJSON(res$output) as.character(jobs$id) } listJobsRunning = function(reg) { requireNamespace("jsonlite") assertRegistry(reg, writeable = FALSE) args = c("job", "list", "--filter", "running", "--output-mode", "json") res = runOSCommand("hq", args) if (res$exit.code > 0L) { OSError("Listing of jobs failed", res) } jobs = jsonlite::fromJSON(res$output) as.character(jobs$id) } makeClusterFunctions( name = "HyperQueue", submitJob = submitJob, killJob = killJob, listJobsRunning = listJobsRunning, listJobsQueued = listJobsQueued, store.job.collection = TRUE, scheduler.latency = scheduler.latency, fs.latency = fs.latency) } ================================================ FILE: R/clusterFunctionsInteractive.R ================================================ #' @title ClusterFunctions for Sequential Execution in the Running R Session #' #' @description #' All jobs are executed sequentially using the current R process in which \code{\link{submitJobs}} is called. #' Thus, \code{submitJob} blocks the session until the job has finished. #' The main use of this \code{ClusterFunctions} implementation is to test and debug programs on a local computer. #' #' Listing jobs returns an empty vector (as no jobs can be running when you call this) #' and \code{killJob} is not implemented for the same reasons. #' #' @param external [\code{logical(1)}]\cr #' If set to \code{TRUE}, jobs are started in a fresh R session instead of currently active but still #' waits for its termination. #' Default is \code{FALSE}. #' @param write.logs [\code{logical(1)}]\cr #' Sink the output to log files. Turning logging off can increase the speed of #' calculations but makes it very difficult to debug. #' Default is \code{TRUE}. #' @inheritParams makeClusterFunctions #' @return [\code{\link{ClusterFunctions}}]. #' @family ClusterFunctions #' @export makeClusterFunctionsInteractive = function(external = FALSE, write.logs = TRUE, fs.latency = 0) { assertFlag(external) assertFlag(write.logs) submitJob = function(reg, jc) { assertRegistry(reg, writeable = TRUE) assertClass(jc, "JobCollection") if (external) { runOSCommand(Rscript(), sprintf("-e \"batchtools::doJobCollection('%s', output = '%s')\"", jc$uri, jc$log.file)) } else { doJobCollection(jc, output = jc$log.file) } makeSubmitJobResult(status = 0L, batch.id = "cfInteractive") } makeClusterFunctions(name = "Interactive", submitJob = submitJob, store.job.collection = external, fs.latency = fs.latency) } ================================================ FILE: R/clusterFunctionsLSF.R ================================================ #' @title ClusterFunctions for LSF Systems #' #' @description #' Cluster functions for LSF (\url{https://www.ibm.com/products/hpc-workload-management}). #' #' Job files are created based on the brew template \code{template.file}. This #' file is processed with brew and then submitted to the queue using the #' \code{bsub} command. Jobs are killed using the \code{bkill} command and the #' list of running jobs is retrieved using \code{bjobs -u $USER -w}. The user #' must have the appropriate privileges to submit, delete and list jobs on the #' cluster (this is usually the case). #' #' The template file can access all resources passed to \code{\link{submitJobs}} #' as well as all variables stored in the \code{\link{JobCollection}}. #' It is the template file's job to choose a queue for the job and handle the desired resource #' allocations. #' #' @note #' Array jobs are currently not supported. #' #' @template template #' @inheritParams makeClusterFunctions #' @return [\code{\link{ClusterFunctions}}]. #' @family ClusterFunctions #' @export makeClusterFunctionsLSF = function(template = "lsf", scheduler.latency = 1, fs.latency = 65) { # nocov start template = findTemplateFile(template) if (testScalarNA(template)) stopf("Argument 'template' (=\"%s\") must point to a readable template file or contain the template itself as string (containing at least one newline)", template) template = cfReadBrewTemplate(template) # When LSB_BJOBS_CONSISTENT_EXIT_CODE = Y, the bjobs command exits with 0 only # when unfinished jobs are found, and 255 when no jobs are found, # or a non-existent job ID is entered. Sys.setenv(LSB_BJOBS_CONSISTENT_EXIT_CODE = "Y") submitJob = function(reg, jc) { assertRegistry(reg, writeable = TRUE) assertClass(jc, "JobCollection") outfile = cfBrewTemplate(reg, template, jc) res = runOSCommand("bsub", stdin = outfile) if (res$exit.code > 0L) { cfHandleUnknownSubmitError("bsub", res$exit.code, res$output) } else { batch.id = stri_extract_first_regex(stri_flatten(res$output, " "), "\\d+") makeSubmitJobResult(status = 0L, batch.id = batch.id) } } listJobs = function(reg, args) { assertRegistry(reg, writeable = FALSE) res = runOSCommand("bjobs", args) if (res$exit.code > 0L) { if (res$exit.code == 255L || any(stri_detect_regex(res$output, "No (unfinished|pending|running) job found"))) return(character(0L)) OSError("Listing of jobs failed", res) } stri_extract_first_regex(tail(res$output, -1L), "\\d+") } listJobsQueued = function(reg) { listJobs(reg, c("-u $USER", "-w", "-p")) } listJobsRunning = function(reg) { listJobs(reg, c("-u $USER", "-w", "-r")) } killJob = function(reg, batch.id) { assertRegistry(reg, writeable = TRUE) assertString(batch.id) cfKillJob(reg, "bkill", batch.id) } makeClusterFunctions(name = "LSF", submitJob = submitJob, killJob = killJob, listJobsQueued = listJobsQueued, listJobsRunning = listJobsRunning, store.job.collection = TRUE, scheduler.latency = scheduler.latency, fs.latency = fs.latency) } # nocov end ================================================ FILE: R/clusterFunctionsMulticore.R ================================================ if (getRversion() < "3.3.2" && .Platform$OS.type != "windows") { # Provided patch for upstream which is shipped with R >= 3.3.2: # https://stat.ethz.ch/pipermail/r-devel/2016-August/073035.html selectChildren = getFromNamespace("selectChildren", "parallel") readChild = getFromNamespace("readChild", "parallel") mccollect = function(pids, timeout = 0) { if (!length(pids)) return (NULL) if (!is.integer(pids)) stop("invalid 'jobs' argument") s = selectChildren(pids, timeout) if (is.logical(s) || !length(s)) return(NULL) res = lapply(s, function(x) { r = readChild(x) if (is.raw(r)) unserialize(r) else NULL }) names(res) = as.character(pids)[match(s, pids)] res } } else { mccollect = function(jobs, timeout = 0) { parallel::mccollect(jobs, wait = FALSE, timeout = timeout) } } Multicore = R6Class("Multicore", cloneable = FALSE, public = list( jobs = NULL, ncpus = NA_integer_, initialize = function(ncpus) { self$jobs = data.table(pid = integer(0L), count = integer(0L)) self$ncpus = ncpus reg.finalizer(self, function(e) mccollect(self$jobs$pid, timeout = 1), onexit = FALSE) }, spawn = function(jc) { force(jc) repeat { self$collect(0) if (nrow(self$jobs) < self$ncpus) break Sys.sleep(1) } pid = parallel::mcparallel(doJobCollection(jc, output = jc$log.file), mc.set.seed = FALSE)$pid self$jobs = rbind(self$jobs, data.table(pid = pid, count = 0L)) invisible(as.character(pid)) }, list = function() { self$collect(0) as.character(self$jobs$pid) }, collect = function(timeout) { repeat { res = mccollect(self$jobs$pid, timeout = timeout) if (is.null(res)) break pids = as.integer(names(res)) self$jobs[pid %in% pids, count := count + 1L] self$jobs = self$jobs[count < 1L] } } ) ) #' @title ClusterFunctions for Parallel Multicore Execution #' #' @description #' Jobs are spawned asynchronously using the functions \code{mcparallel} and \code{mccollect} (both in \pkg{parallel}). #' Does not work on Windows, use \code{\link{makeClusterFunctionsSocket}} instead. #' #' @template ncpus #' @inheritParams makeClusterFunctions #' @return [\code{\link{ClusterFunctions}}]. #' @family ClusterFunctions #' @export makeClusterFunctionsMulticore = function(ncpus = NA_integer_, fs.latency = 0) { if (testOS("windows")) stop("ClusterFunctionsMulticore do not support Windows. Use makeClusterFunctionsSocket instead.") if (is.na(ncpus)) { ncpus = max(as.numeric(getOption("mc.cores")), parallel::detectCores(), 1L, na.rm = TRUE) info("Auto-detected %i CPUs", ncpus) } ncpus = asCount(ncpus, na.ok = FALSE, positive = TRUE) p = Multicore$new(ncpus) submitJob = function(reg, jc) { force(jc) pid = p$spawn(jc) makeSubmitJobResult(status = 0L, batch.id = pid) } listJobsRunning = function(reg) { assertRegistry(reg, writeable = FALSE) p$list() } makeClusterFunctions(name = "Multicore", submitJob = submitJob, listJobsRunning = listJobsRunning, store.job.collection = FALSE, fs.latency = fs.latency, hooks = list(pre.sync = function(reg, fns) p$collect(1))) } ================================================ FILE: R/clusterFunctionsOpenLava.R ================================================ #' @title ClusterFunctions for OpenLava #' #' @description #' Cluster functions for OpenLava. #' #' Job files are created based on the brew template \code{template}. This #' file is processed with brew and then submitted to the queue using the #' \code{bsub} command. Jobs are killed using the \code{bkill} command and the #' list of running jobs is retrieved using \code{bjobs -u $USER -w}. The user #' must have the appropriate privileges to submit, delete and list jobs on the #' cluster (this is usually the case). #' #' The template file can access all resources passed to \code{\link{submitJobs}} #' as well as all variables stored in the \code{\link{JobCollection}}. #' It is the template file's job to choose a queue for the job and handle the desired resource #' allocations. #' #' @note #' Array jobs are currently not supported. #' #' @template template #' @inheritParams makeClusterFunctions #' @return [\code{\link{ClusterFunctions}}]. #' @family ClusterFunctions #' @export makeClusterFunctionsOpenLava = function(template = "openlava", scheduler.latency = 1, fs.latency = 65) { # nocov start template = findTemplateFile(template) if (testScalarNA(template)) stopf("Argument 'template' (=\"%s\") must point to a readable template file", template) template = cfReadBrewTemplate(template) # When LSB_BJOBS_CONSISTENT_EXIT_CODE = Y, the bjobs command exits with 0 only # when unfinished jobs are found, and 255 when no jobs are found, # or a non-existent job ID is entered. Sys.setenv(LSB_BJOBS_CONSISTENT_EXIT_CODE = "Y") submitJob = function(reg, jc) { assertRegistry(reg, writeable = TRUE) assertClass(jc, "JobCollection") outfile = cfBrewTemplate(reg, template, jc) res = runOSCommand("bsub", stdin = shQuote(outfile)) if (res$exit.code > 0L) { cfHandleUnknownSubmitError("bsub", res$exit.code, res$output) } else { batch.id = stri_extract_first_regex(stri_flatten(res$output, " "), "\\d+") makeSubmitJobResult(status = 0L, batch.id = batch.id) } } listJobs = function(reg, args) { assertRegistry(reg, writeable = FALSE) res = runOSCommand("bjobs", args) if (res$exit.code > 0L) { if (res$exit.code == 255L || any(stri_detect_regex(res$output, "No (unfinished|pending|running) job found"))) return(character(0L)) OSError("Listing of jobs failed", res) } stri_extract_first_regex(tail(res$output, -1L), "\\d+") } listJobsQueued = function(reg) { listJobs(reg, c("-u $USER", "-w", "-p")) } listJobsRunning = function(reg) { listJobs(reg, c("-u $USER", "-w", "-r")) } killJob = function(reg, batch.id) { assertRegistry(reg, writeable = TRUE) assertString(batch.id) cfKillJob(reg, "bkill", batch.id) } makeClusterFunctions(name = "OpenLava", submitJob = submitJob, killJob = killJob, listJobsQueued = listJobsQueued, listJobsRunning = listJobsRunning, store.job.collection = TRUE, scheduler.latency = scheduler.latency, fs.latency = fs.latency) } # nocov end ================================================ FILE: R/clusterFunctionsSGE.R ================================================ #' @title ClusterFunctions for SGE Systems #' #' @description #' Cluster functions for Univa Grid Engine / Oracle Grid Engine / #' Sun Grid Engine (\url{https://altair.com/hpc-cloud-applications/}). #' #' Job files are created based on the brew template \code{template}. This #' file is processed with brew and then submitted to the queue using the #' \code{qsub} command. Jobs are killed using the \code{qdel} command and the #' list of running jobs is retrieved using \code{qselect}. The user must have #' the appropriate privileges to submit, delete and list jobs on the cluster #' (this is usually the case). #' #' The template file can access all resources passed to \code{\link{submitJobs}} #' as well as all variables stored in the \code{\link{JobCollection}}. #' It is the template file's job to choose a queue for the job and handle the desired resource #' allocations. #' #' @note #' Array jobs are currently not supported. #' #' @template template #' @inheritParams makeClusterFunctions #' @template nodename #' @return [\code{\link{ClusterFunctions}}]. #' @family ClusterFunctions #' @export makeClusterFunctionsSGE = function(template = "sge", nodename = "localhost", scheduler.latency = 1, fs.latency = 65) { # nocov start assertString(nodename) template = findTemplateFile(template) if (testScalarNA(template)) stopf("Argument 'template' (=\"%s\") must point to a readable template file", template) template = cfReadBrewTemplate(template) quote = if (isLocalHost(nodename)) identity else shQuote submitJob = function(reg, jc) { assertRegistry(reg, writeable = TRUE) assertClass(jc, "JobCollection") outfile = cfBrewTemplate(reg, template, jc) res = runOSCommand("qsub", shQuote(outfile), nodename = nodename) if (res$exit.code > 0L) { cfHandleUnknownSubmitError("qsub", res$exit.code, res$output) } else { batch.id = stri_extract_first_regex(stri_flatten(res$output, " "), "\\d+") makeSubmitJobResult(status = 0L, batch.id = batch.id) } } listJobs = function(reg, args) { assertRegistry(reg, writeable = FALSE) res = runOSCommand("qstat", args, nodename = nodename) if (res$exit.code > 0L) OSError("Listing of jobs failed", res) stri_extract_first_regex(tail(res$output, -2L), "\\d+") } listJobsQueued = function(reg) { listJobs(reg, c("-u $USER", "-s p")) } listJobsRunning = function(reg) { listJobs(reg, c("-u $USER", "-s rs")) } killJob = function(reg, batch.id) { assertRegistry(reg, writeable = TRUE) assertString(batch.id) cfKillJob(reg, "qdel", batch.id, nodename = nodename) } makeClusterFunctions(name = "SGE", submitJob = submitJob, killJob = killJob, listJobsQueued = listJobsQueued, listJobsRunning = listJobsRunning, store.job.collection = TRUE, scheduler.latency = scheduler.latency, fs.latency = fs.latency) } # nocov end ================================================ FILE: R/clusterFunctionsSSH.R ================================================ #' @title ClusterFunctions for Remote SSH Execution #' #' @description #' Jobs are spawned by starting multiple R sessions via \code{Rscript} over SSH. #' If the hostname of the \code{\link{Worker}} equals \dQuote{localhost}, #' \code{Rscript} is called directly so that you do not need to have an SSH client installed. #' #' @param workers [\code{list} of \code{\link{Worker}}]\cr #' List of Workers as constructed with \code{\link{Worker}}. #' @inheritParams makeClusterFunctions #' #' @note #' If you use a custom \dQuote{.ssh/config} file, make sure your #' ProxyCommand passes \sQuote{-q} to ssh, otherwise each output will #' end with the message \dQuote{Killed by signal 1} and this will break #' the communication with the nodes. #' #' @return [\code{\link{ClusterFunctions}}]. #' @family ClusterFunctions #' @export #' @examples #' \dontrun{ #' # cluster functions for multicore execution on the local machine #' makeClusterFunctionsSSH(list(Worker$new("localhost", ncpus = 2))) #' } makeClusterFunctionsSSH = function(workers, fs.latency = 65) { # nocov start assertList(workers, types = "Worker") names(workers) = vcapply(workers, "[[", "nodename") if (anyDuplicated(names(workers))) stop("Duplicated hostnames found in list of workers") submitJob = function(reg, jc) { assertRegistry(reg, writeable = TRUE) assertClass(jc, "JobCollection") lapply(workers, function(w) w$update(reg)) rload = vnapply(workers, function(w) w$max.load / w$ncpus) worker = Find(function(w) w$status == "available", sample(workers, prob = 1 / (rload + 0.1)), nomatch = NULL) if (!is.null(worker) && worker$status == "available") { pid = try(worker$start(reg, jc$uri, jc$log.file)) if (is.error(pid)) { makeSubmitJobResult(status = 101L, batch.id = NA_character_, msg = "Submit failed.") } else { makeSubmitJobResult(status = 0L, batch.id = sprintf("%s#%s", worker$nodename, pid$output)) } } else { makeSubmitJobResult(status = 1L, batch.id = NA_character_, msg = sprintf("Busy: %s", workers[[1L]]$status)) } } killJob = function(reg, batch.id) { assertRegistry(reg, writeable = TRUE) assertString(batch.id) nodename = stri_split_fixed(batch.id, "#", n = 2L)[[1L]][1L] workers[[nodename]]$kill(reg, batch.id) } listJobsRunning = function(reg) { assertRegistry(reg, writeable = FALSE) unlist(lapply(workers, function(w) w$list(reg)), use.names = FALSE) } makeClusterFunctions(name = "SSH", submitJob = submitJob, killJob = killJob, listJobsRunning = listJobsRunning, store.job.collection = TRUE, fs.latency = fs.latency) } # nocov end ================================================ FILE: R/clusterFunctionsSlurm.R ================================================ #' @title ClusterFunctions for Slurm Systems #' #' @description #' Cluster functions for Slurm (\url{https://slurm.schedmd.com/}). #' #' Job files are created based on the brew template \code{template.file}. This #' file is processed with brew and then submitted to the queue using the #' \code{sbatch} command. Jobs are killed using the \code{scancel} command and #' the list of running jobs is retrieved using \code{squeue}. The user must #' have the appropriate privileges to submit, delete and list jobs on the #' cluster (this is usually the case). #' #' The template file can access all resources passed to \code{\link{submitJobs}} #' as well as all variables stored in the \code{\link{JobCollection}}. #' It is the template file's job to choose a queue for the job and handle the desired resource #' allocations. #' #' Note that you might have to specify the cluster name here if you do not want to use the default, #' otherwise the commands for listing and killing jobs will not work. #' #' @template template #' @param array.jobs [\code{logical(1)}]\cr #' If array jobs are disabled on the computing site, set to \code{FALSE}. #' @template nodename #' @inheritParams makeClusterFunctions #' @return [\code{\link{ClusterFunctions}}]. #' @family ClusterFunctions #' @export makeClusterFunctionsSlurm = function(template = "slurm", array.jobs = TRUE, nodename = "localhost", scheduler.latency = 1, fs.latency = 65) { # nocov start assertFlag(array.jobs) assertString(nodename) template = findTemplateFile(template) if (testScalarNA(template)) stopf("Argument 'template' (=\"%s\") must point to a readable template file", template) template = cfReadBrewTemplate(template, "##") quote = if (isLocalHost(nodename)) identity else shQuote getClusters = function(reg) { clusters = filterNull(lapply(reg$resources$resources, "[[", "cluster")) if (length(clusters)) return(stri_flatten(unique(as.character(clusters)), ",")) return(character(0L)) } submitJob = function(reg, jc) { assertRegistry(reg, writeable = TRUE) assertClass(jc, "JobCollection") if (jc$array.jobs) { logs = sprintf("%s_%i", fs::path_file(jc$log.file), seq_row(jc$jobs)) jc$log.file = stri_join(jc$log.file, "_%a") } outfile = cfBrewTemplate(reg, template, jc) res = runOSCommand("sbatch", shQuote(outfile), nodename = nodename) output = stri_flatten(stri_trim_both(res$output), "\n") if (res$exit.code > 0L) { temp.errors = c( "Batch job submission failed: Job violates accounting policy (job submit limit, user's size and/or time limits)", "Socket timed out on send/recv operation", "Submission rate too high, suggest using job arrays" ) i = wf(stri_detect_fixed(output, temp.errors)) if (length(i) == 1L) return(makeSubmitJobResult(status = i, batch.id = NA_character_, msg = temp.errors[i])) return(cfHandleUnknownSubmitError("sbatch", res$exit.code, res$output)) } id = stri_split_fixed(output[1L], " ")[[1L]][4L] if (jc$array.jobs) { if (!array.jobs) stop("Array jobs not supported by cluster function") makeSubmitJobResult(status = 0L, batch.id = sprintf("%s_%i", id, seq_row(jc$jobs)), log.file = logs) } else { makeSubmitJobResult(status = 0L, batch.id = id) } } listJobs = function(reg, args) { assertRegistry(reg, writeable = FALSE) args = c(args, "--noheader", "--format=%i") if (array.jobs) args = c(args, "-r") clusters = getClusters(reg) if (length(clusters)) args = c(args, sprintf("--clusters=%s", clusters)) res = runOSCommand("squeue", args, nodename = nodename) if (res$exit.code > 0L) OSError("Listing of jobs failed", res) if (length(clusters)) tail(res$output, -1L) else res$output } # Full List of Slurm job state codes: # https://slurm.schedmd.com/squeue.html # BF,CA,CD,CF,CG,DL,F,NF,OOM,PD,PR,R,RD,RF,RH,RS,RV,SI,SE,SO,ST,S,TO # Querying by RD (RESV_DEL_HOLD) status throwing error on slurm v20.11.4 listJobsQueued = function(reg) { args = c(quote("--user=$USER"), "--states=PD,CF,RF,RH,RQ,SE") listJobs(reg, args) } listJobsRunning = function(reg) { args = c(quote("--user=$USER"), "--states=R,S,CG,RS,SI,SO,ST") listJobs(reg, args) } # Slurm job state codes that will result in an expired status: # BF,CA,CD,DL,F,NF,OOM,PR,RV,TO,RD killJob = function(reg, batch.id) { assertRegistry(reg, writeable = TRUE) assertString(batch.id) cfKillJob(reg, "scancel", c(sprintf("--clusters=%s", getClusters(reg)), batch.id), nodename = nodename) } makeClusterFunctions(name = "Slurm", submitJob = submitJob, killJob = killJob, listJobsRunning = listJobsRunning, listJobsQueued = listJobsQueued, array.var = "SLURM_ARRAY_TASK_ID", store.job.collection = TRUE, store.job.files = !isLocalHost(nodename), scheduler.latency = scheduler.latency, fs.latency = fs.latency) } # nocov end ================================================ FILE: R/clusterFunctionsSocket.R ================================================ Socket = R6Class("Socket", cloneable = FALSE, public = list( cl = NULL, pids = NULL, initialize = function(ncpus) { loadNamespace("snow") self$cl = snow::makeSOCKcluster(rep.int("localhost", ncpus)) self$pids = character(ncpus) reg.finalizer(self, function(e) if (!is.null(e$cl)) { snow::stopCluster(e$cl); self$cl = NULL }, onexit = TRUE) }, spawn = function(jc, ...) { force(jc) if (all(nzchar(self$pids))) { res = snow::recvOneResult(self$cl) self$pids[self$pids == res$tag] = "" } i = wf(!nzchar(self$pids)) snow::sendCall(self$cl[[i]], doJobCollection, list(jc = jc, output = jc$log.file), return = FALSE, tag = jc$job.hash) self$pids[i] = jc$job.hash invisible(jc$job.hash) }, list = function() { if (is.null(self$cl)) return(character(0L)) sl = lapply(self$cl, function(x) x$con) finished = which(socketSelect(sl, write = FALSE, timeout = 1)) for (i in seq_along(finished)) { res = snow::recvOneResult(self$cl) self$pids[self$pids == res$tag] = "" } self$pids[nzchar(self$pids)] } ) ) #' @title ClusterFunctions for Parallel Socket Execution #' #' @description #' Jobs are spawned asynchronously using the package \pkg{snow}. #' #' @template ncpus #' @inheritParams makeClusterFunctions #' @return [\code{\link{ClusterFunctions}}]. #' @family ClusterFunctions #' @export makeClusterFunctionsSocket = function(ncpus = NA_integer_, fs.latency = 65) { assertCount(ncpus, positive = TRUE, na.ok = TRUE) if (is.na(ncpus)) { ncpus = max(getOption("mc.cores", parallel::detectCores()), 1L, na.rm = TRUE) info("Auto-detected %i CPUs", ncpus) } p = Socket$new(ncpus) submitJob = function(reg, jc) { assertRegistry(reg, writeable = TRUE) assertClass(jc, "JobCollection") p$spawn(jc) makeSubmitJobResult(status = 0L, batch.id = jc$job.hash) } listJobsRunning = function(reg) { assertRegistry(reg, writeable = FALSE) p$list() } makeClusterFunctions(name = "Socket", submitJob = submitJob, listJobsRunning = listJobsRunning, store.job.collection = FALSE, fs.latency = fs.latency, hooks = list(pre.sync = function(reg, fns) p$list())) } ================================================ FILE: R/clusterFunctionsTORQUE.R ================================================ #' @title ClusterFunctions for OpenPBS/TORQUE Systems #' #' @description #' Cluster functions for TORQUE/PBS (\url{https://adaptivecomputing.com/cherry-services/torque-resource-manager/}). #' #' Job files are created based on the brew template \code{template.file}. This file is processed #' with brew and then submitted to the queue using the \code{qsub} command. Jobs are killed using #' the \code{qdel} command and the list of running jobs is retrieved using \code{qselect}. The user #' must have the appropriate privileges to submit, delete and list jobs on the cluster (this is #' usually the case). #' #' The template file can access all resources passed to \code{\link{submitJobs}} #' as well as all variables stored in the \code{\link{JobCollection}}. #' It is the template file's job to choose a queue for the job and handle the desired resource #' allocations. #' #' @template template #' @inheritParams makeClusterFunctions #' @return [\code{\link{ClusterFunctions}}]. #' @family ClusterFunctions #' @export makeClusterFunctionsTORQUE = function(template = "torque", scheduler.latency = 1, fs.latency = 65) { # nocov start template = findTemplateFile(template) if (testScalarNA(template)) stopf("Argument 'template' (=\"%s\") must point to a readable template", template) template = cfReadBrewTemplate(template, "##") submitJob = function(reg, jc) { assertRegistry(reg, writeable = TRUE) assertClass(jc, "JobCollection") outfile = cfBrewTemplate(reg, template, jc) res = runOSCommand("qsub", shQuote(outfile)) output = stri_flatten(stri_trim_both(res$output), "\n") if (res$exit.code > 0L) { max.jobs.msg = "Maximum number of jobs already in queue" if (stri_detect_fixed(output, max.jobs.msg) || res$exit.code == 228L) return(makeSubmitJobResult(status = 1L, batch.id = NA_character_, msg = max.jobs.msg)) return(cfHandleUnknownSubmitError("qsub", res$exit.code, res$output)) } if (jc$array.jobs) { logs = sprintf("%s-%i", fs::path_file(jc$log.file), seq_row(jc$jobs)) makeSubmitJobResult(status = 0L, batch.id = stri_replace_first_fixed(output, "[]", stri_paste("[", seq_row(jc$jobs), "]")), log.file = logs) } else { makeSubmitJobResult(status = 0L, batch.id = output) } } killJob = function(reg, batch.id) { assertRegistry(reg, writeable = TRUE) assertString(batch.id) cfKillJob(reg, "qdel", batch.id) } listJobs = function(reg, args) { assertRegistry(reg, writeable = FALSE) res = runOSCommand("qselect", args) if (res$exit.code > 0L) OSError("Listing of jobs failed", res) res$output } listJobsQueued = function(reg) { args = c("-u $USER", "-s QW") listJobs(reg, args) } listJobsRunning = function(reg) { args = c("-u $USER", "-s EHRT") listJobs(reg, args) } makeClusterFunctions(name = "TORQUE", submitJob = submitJob, killJob = killJob, listJobsQueued = listJobsQueued, listJobsRunning = listJobsRunning, array.var = "PBS_ARRAYID", store.job.collection = TRUE, scheduler.latency = scheduler.latency, fs.latency = fs.latency) } # nocov end ================================================ FILE: R/config.R ================================================ #' @title Find a batchtools Configuration File #' #' @description #' This functions returns the path to the first configuration file found in the following locations: #' \enumerate{ #' \item{File \dQuote{batchtools.conf.R} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}.} #' \item{File \dQuote{batchtools.conf.R} in the current working directory.} #' \item{File \dQuote{config.R} in the user configuration directory as reported by \code{rappdirs::user_config_dir("batchtools", expand = FALSE)} (depending on OS, e.g., on linux this usually resolves to \dQuote{~/.config/batchtools/config.R}).} #' \item{\dQuote{.batchtools.conf.R} in the home directory (\dQuote{~}).} #' \item{\dQuote{config.R} in the site config directory as reported by \code{rappdirs::site_config_dir("batchtools")} (depending on OS). This file can be used for admins to set sane defaults for a computation site.} #' } #' @return [\code{character(1)}] Path to the configuration file or \code{NA} if no configuration file was found. #' @keywords internal #' @export findConfFile = function() { x = Sys.getenv("R_BATCHTOOLS_SEARCH_PATH") if (nzchar(x)) { x = fs::path(x, "batchtools.conf.R") if (fs::file_access(x, "read")) return(fs::path_abs(x)) } x = "batchtools.conf.R" if (fs::file_access(x, "read")) return(fs::path_abs(x)) x = fs::path(user_config_dir("batchtools", expand = FALSE), "config.R") if (fs::file_access(x, "read")) return(x) x = fs::path("~", ".batchtools.conf.R") if (fs::file_access(x, "read")) return(fs::path_abs(x)) x = fs::path(site_config_dir("batchtools"), "config.R") if (fs::file_access(x, "read")) return(x) return(NA_character_) } setSystemConf = function(reg, conf.file) { reg$cluster.functions = makeClusterFunctionsInteractive() reg$default.resources = list() reg$temp.dir = fs::path_temp() reg$compress = "gzip" if (!is.na(conf.file)) { assertString(conf.file) info("Sourcing configuration file '%s' ...", conf.file) sys.source(conf.file, envir = reg, keep.source = FALSE) assertClass(reg$cluster.functions, "ClusterFunctions") assertList(reg$default.resources, names = "unique") fs::dir_create(reg$temp.dir) } else { info("No readable configuration file found") } } ================================================ FILE: R/doJobCollection.R ================================================ #' @title Execute Jobs of a JobCollection #' #' @description #' Executes every job in a \code{\link{JobCollection}}. #' This function is intended to be called on the slave. #' #' @param jc [\code{\link{JobCollection}}]\cr #' Either an object of class \dQuote{JobCollection} as returned by #' \code{\link{makeJobCollection}} or a string with the path to file #' containing a \dQuote{JobCollection} as RDS file (as stored by \code{\link{submitJobs}}). #' @param output [\code{character(1)}]\cr #' Path to a file to write the output to. Defaults to \code{NULL} which means #' that output is written to the active \code{\link[base]{sink}}. #' Do not set this if your scheduler redirects output to a log file. #' @return [\code{character(1)}]: Hash of the \code{\link{JobCollection}} executed. #' @family JobCollection #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' batchMap(identity, 1:2, reg = tmp) #' jc = makeJobCollection(1:2, reg = tmp) #' doJobCollection(jc) doJobCollection = function(jc, output = NULL) { UseMethod("doJobCollection") } #' @export doJobCollection.character = function(jc, output = NULL) { obj = readRDS(jc) force(obj) if (!batchtools$debug && !obj$array.jobs) { fs::file_delete(jc) job = fs::path_ext_set(jc, "job") if (fs::file_exists(job)) fs::file_delete(job) } doJobCollection.JobCollection(obj, output = output) } #' @export doJobCollection.JobCollection = function(jc, output = NULL) { error = function(msg, ...) { now = ustamp() updates = data.table(job.id = jc$jobs$job.id, started = now, done = now, error = stri_trunc(stri_trim_both(sprintf(msg, ...)), 500L, " [truncated]"), mem.used = NA_real_, key = "job.id") writeRDS(updates, file = fs::path(jc$file.dir, "updates", sprintf("%s.rds", jc$job.hash)), compress = jc$compress) invisible(NULL) } # signal warnings immediately opts = options("warn") options(warn = 1L) on.exit(options(opts)) # setup output connection if (!is.null(output)) { if (!testPathForOutput(output, overwrite = TRUE)) return(error("Cannot create output file for logging")) fp = file(output, open = "wt") sink(file = fp) sink(file = fp, type = "message") on.exit({ sink(type = "message"); sink(type = "output"); close(fp) }, add = TRUE) } # subset array jobs if (jc$array.jobs) { i = as.integer(Sys.getenv(jc$array.var)) if (!testInteger(i, any.missing = FALSE, lower = 1L, upper = nrow(jc$jobs))) return(error("Failed to subset JobCollection using array environment variable '%s' [='%s']", jc$array.var, i)) jc$jobs = jc$jobs[i] } # say hi n.jobs = nrow(jc$jobs) s = now() catf("### [bt%s]: This is batchtools v%s", s, packageVersion("batchtools")) catf("### [bt%s]: Starting calculation of %i jobs", s, n.jobs) catf("### [bt%s]: Setting working directory to '%s'", s, jc$work.dir) # set work dir if (!fs::dir_exists(jc$work.dir)) return(error("Work dir does not exist")) local_dir(jc$work.dir) # load registry dependencies: packages, source files, ... # note that this should happen _before_ parallelMap or foreach is initialized ok = try(loadRegistryDependencies(jc, must.work = TRUE), silent = TRUE) if (is.error(ok)) return(error("Error loading registry dependencies: %s", as.character(ok))) # setup inner parallelization with parallelMap if (hasName(jc$resources, "pm.backend")) { if (!requireNamespace("parallelMap", quietly = TRUE)) return(error("parallelMap not installed")) pm.opts = filterNull(insert(list(mode = jc$resources$pm.backend, cpus = jc$resources$ncpus, show.info = FALSE), jc$resources$pm.opts)) do.call(parallelMap::parallelStart, pm.opts) on.exit(parallelMap::parallelStop(), add = TRUE) pm.opts = parallelMap::parallelGetOptions()$settings catf("### [bt%s]: Using %i CPUs for parallelMap/%s on level '%s'", s, pm.opts$cpus, pm.opts$mode, if (is.na(pm.opts$level)) "default" else pm.opts$level) } # setup inner parallelization with foreach if (hasName(jc$resources, "foreach.backend")) { if (!requireNamespace("foreach", quietly = TRUE)) return(error("Package 'foreach' is not installed")) backend = jc$resources$foreach.backend ncpus = jc$resources$ncpus if (backend == "seq") { foreach::registerDoSEQ() } else if (backend == "parallel") { if (!requireNamespace("doParallel", quietly = TRUE)) return(error("Package 'doParallel' is not installed")) doParallel::registerDoParallel(cores = ncpus) } else if (backend == "mpi") { if (!requireNamespace("doMPI", quietly = TRUE)) return(error("Package 'doMPI' is not installed")) cl = doMPI::startMPIcluster(count = ncpus) doMPI::registerDoMPI(cl) on.exit(doMPI::closeCluster(cl), add = TRUE) } else { return(error("Unknwon foreach backend: '%s'", backend)) } catf("### [bt%s]: Using %i CPUs for foreach/%s", s, ncpus, backend) } # setup memory measurement measure.memory = isTRUE(jc$resources$measure.memory) catf("### [bt%s]: Memory measurement %s", s, ifelse(measure.memory, "enabled", "disabled")) if (measure.memory) { memory.mult = c(if (.Machine$sizeof.pointer == 4L) 28L else 56L, 8L) } # try to pre-fetch some objects from the file system reader = RDSReader$new(n.jobs > 1L) buf = UpdateBuffer$new(jc$jobs$job.id) runHook(jc, "pre.do.collection", reader = reader) for (i in seq_len(n.jobs)) { job = getJob(jc, i, reader = reader) id = job$id update = list(started = ustamp(), done = NA_integer_, error = NA_character_, mem.used = NA_real_) catf("### [bt%s]: Starting job [batchtools job.id=%i]", now(), id) if (measure.memory) { gc(reset = TRUE) result = try(execJob(job)) update$mem.used = sum(gc()[, 1L] * memory.mult) / 1000000L } else { result = try(execJob(job)) } update$done = ustamp() if (is.error(result)) { catf("\n### [bt%s]: Job terminated with an exception [batchtools job.id=%i]", now(), id) update$error = stri_trunc(stri_trim_both(as.character(result)), 500L, " [truncated]") } else { catf("\n### [bt%s]: Job terminated successfully [batchtools job.id=%i]", now(), id) writeRDS(result, file = getResultFiles(jc, id), compress = jc$compress) } buf$add(i, update) buf$flush(jc) } runHook(jc, "post.do.collection", updates = buf$updates, reader = reader) buf$save(jc) catf("### [bt%s]: Calculation finished!", now()) invisible(jc$job.hash) } UpdateBuffer = R6Class("UpdateBuffer", cloneable = FALSE, public = list( updates = NULL, next.update = NA_real_, initialize = function(ids) { self$updates = data.table(job.id = ids, started = NA_real_, done = NA_real_, error = NA_character_, mem.used = NA_real_, written = FALSE, key = "job.id") self$next.update = Sys.time() + runif(1L, 60, 300) }, add = function(i, x) { set(self$updates, i, names(x), x) }, save = function(jc) { i = self$updates[!is.na(started) & (!written), which = TRUE] if (length(i) > 0L) { first.id = self$updates$job.id[i[1L]] writeRDS(self$updates[i, !"written"], file = fs::path(jc$file.dir, "updates", sprintf("%s-%i.rds", jc$job.hash, first.id)), compress = jc$compress) set(self$updates, i, "written", TRUE) } }, flush = function(jc) { now = Sys.time() if (now > self$next.update) { self$save(jc) self$next.update = now + runif(1L, 60, 300) } } ) ) ================================================ FILE: R/estimateRuntimes.R ================================================ #' @title Estimate Remaining Runtimes #' #' @description #' Estimates the runtimes of jobs using the random forest implemented in \pkg{ranger}. #' Observed runtimes are retrieved from the \code{\link{Registry}} and runtimes are #' predicted for unfinished jobs. #' #' The estimated remaining time is calculated in the \code{print} method. #' You may also pass \code{n} here to determine the number of parallel jobs which is then used #' in a simple Longest Processing Time (LPT) algorithm to give an estimate for the parallel runtime. #' #' @param tab [\code{\link[data.table]{data.table}}]\cr #' Table with column \dQuote{job.id} and additional columns to predict the runtime. #' Observed runtimes will be looked up in the registry and serve as dependent variable. #' All columns in \code{tab} except \dQuote{job.id} will be passed to \code{\link[ranger]{ranger}} as #' independent variables to fit the model. #' @param ... [ANY]\cr #' Additional parameters passed to \code{\link[ranger]{ranger}}. Ignored for the \code{print} method. #' @template reg #' @return [\code{RuntimeEstimate}] which is a \code{list} with two named elements: #' \dQuote{runtimes} is a \code{\link[data.table]{data.table}} with columns \dQuote{job.id}, #' \dQuote{runtime} (in seconds) and \dQuote{type} (\dQuote{estimated} if runtime is estimated, #' \dQuote{observed} if runtime was observed). #' The other element of the list named \dQuote{model}] contains the fitted random forest object. #' @export #' @seealso \code{\link{binpack}} and \code{\link{lpt}} to chunk jobs according to their estimated runtimes. #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' # Create a simple toy registry #' set.seed(1) #' tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE, seed = 1) #' addProblem(name = "iris", data = iris, fun = function(data, ...) nrow(data), reg = tmp) #' addAlgorithm(name = "nrow", function(instance, ...) nrow(instance), reg = tmp) #' addAlgorithm(name = "ncol", function(instance, ...) ncol(instance), reg = tmp) #' addExperiments(algo.designs = list(nrow = data.table::CJ(x = 1:50, y = letters[1:5])), reg = tmp) #' addExperiments(algo.designs = list(ncol = data.table::CJ(x = 1:50, y = letters[1:5])), reg = tmp) #' #' # We use the job parameters to predict runtimes #' tab = unwrap(getJobPars(reg = tmp)) #' #' # First we need to submit some jobs so that the forest can train on some data. #' # Thus, we just sample some jobs from the registry while grouping by factor variables. #' library(data.table) #' ids = tab[, .SD[sample(nrow(.SD), 5)], by = c("problem", "algorithm", "y")] #' setkeyv(ids, "job.id") #' submitJobs(ids, reg = tmp) #' waitForJobs(reg = tmp) #' #' # We "simulate" some more realistic runtimes here to demonstrate the functionality: #' # - Algorithm "ncol" is 5 times more expensive than "nrow" #' # - x has no effect on the runtime #' # - If y is "a" or "b", the runtimes are really high #' runtime = function(algorithm, x, y) { #' ifelse(algorithm == "nrow", 100L, 500L) + 1000L * (y %in% letters[1:2]) #' } #' tmp$status[ids, done := done + tab[ids, runtime(algorithm, x, y)]] #' rjoin(sjoin(tab, ids), getJobStatus(ids, reg = tmp)[, c("job.id", "time.running")]) #' #' # Estimate runtimes: #' est = estimateRuntimes(tab, reg = tmp) #' print(est) #' rjoin(tab, est$runtimes) #' print(est, n = 10) #' #' # Submit jobs with longest runtime first: #' ids = est$runtimes[type == "estimated"][order(runtime, decreasing = TRUE)] #' print(ids) #' \dontrun{ #' submitJobs(ids, reg = tmp) #' } #' #' # Group jobs into chunks with runtime < 1h #' ids = est$runtimes[type == "estimated"] #' ids[, chunk := binpack(runtime, 3600)] #' print(ids) #' print(ids[, list(runtime = sum(runtime)), by = chunk]) #' \dontrun{ #' submitJobs(ids, reg = tmp) #' } #' #' # Group jobs into 10 chunks with similar runtime #' ids = est$runtimes[type == "estimated"] #' ids[, chunk := lpt(runtime, 10)] #' print(ids[, list(runtime = sum(runtime)), by = chunk]) estimateRuntimes = function(tab, ..., reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) data = copy(convertIds(reg, tab, keep.extra = names(tab))) if (!requireNamespace("ranger", quietly = TRUE)) stop("Please install package 'ranger' for runtime estimation") data[, "runtime" := as.numeric(getJobStatus(tab, reg)$time.running)] i = is.na(data$runtime) if (all(i)) stop("No training data available. Some jobs must be finished before estimating runtimes.") rf = ranger::ranger(runtime ~ ., data = data[!i, !"job.id"], ...) data[i, "runtime" := predict(rf, .SD)$predictions, .SDcols = chsetdiff(names(data), c("job.id", "runtime"))] data$type = factor(ifelse(i, "estimated", "observed"), levels = c("observed", "estimated")) setClasses(list(runtimes = data[, c("job.id", "type", "runtime")], model = rf), c("RuntimeEstimate", class(data))) } #' @rdname estimateRuntimes #' @param x [\code{RuntimeEstimate}]\cr #' Object to print. #' @param n [\code{integer(1)}]\cr #' Number of parallel jobs to assume for runtime estimation. #' @export print.RuntimeEstimate = function(x, n = 1L, ...) { ps = function(x, nc = 2L) { sprintf(paste0("%0", nc, "id %02ih %02im %.1fs"), floor(x / 86400), floor((x / 3600) %% 24L), floor((x / 60) %% 60L), x %% 60L ) } assertCount(n, positive = TRUE) runtime = type = NULL calculated = x$runtimes[type == "observed", sum(runtime)] remaining = x$runtimes[type == "estimated", sum(runtime)] total = calculated + remaining nc = max(1L, nchar(total %/% 86400)) catf("Runtime Estimate for %i jobs with %i CPUs", nrow(x$runtimes), n) catf(" Done : %s", ps(calculated, nc = nc)) if (x$runtimes[type == "estimated", .N] > 0L) { catf(" Remaining: %s", ps(remaining, nc = nc)) if (n >= 2L) { rt = x$runtimes[type == "estimated"]$runtime bins = lpt(rt, n) bins = vnapply(split(rt, bins), sum) catf(" Parallel : %s", ps(max(bins), nc = nc)) } } catf(" Total : %s", ps(total, nc = nc)) } ================================================ FILE: R/execJob.R ================================================ #' @title Execute a Single Jobs #' #' @description #' Executes a single job (as created by \code{\link{makeJob}}) and returns #' its result. Also works for Experiments. #' #' @param job [\code{\link{Job}} | \code{\link{Experiment}}]\cr #' Job/Experiment to execute. #' @return Result of the job. #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' batchMap(identity, 1:2, reg = tmp) #' job = makeJob(1, reg = tmp) #' execJob(job) execJob = function(job) { UseMethod("execJob") } #' @export execJob.character = function(job) { execJob(readRDS(job)) } #' @export execJob.JobCollection = function(job) { if (nrow(job$jobs) != 1L) stop("You must provide a JobCollection with exactly one job") execJob(getJob(job, i = 1L)) } #' @export execJob.Job = function(job) { opts = options("error") options(error = function(e) traceback(2L)) on.exit(options(opts)) # this needs to be cat, message outputs to stderr which R cannot capture properly catf("### [bt%s]: Setting seed to %i ...", now(), job$seed) if (".job" %chin% names(formals(job$fun))) { with_seed(job$seed, do.call(job$fun, c(job$pars, list(.job = job)), envir = .GlobalEnv)) } else { with_seed(job$seed, do.call(job$fun, job$pars, envir = .GlobalEnv)) } } #' @export execJob.Experiment = function(job) { opts = options("error") options(error = function(e) traceback(2L)) on.exit(options(opts)) # this needs to be cat, message outputs to stderr which R cannot capture properly catf("### [bt%s]: Generating problem instance for problem '%s' ...", now(), job$prob.name) instance = job$instance force(instance) job$allow.access.to.instance = FALSE wrapper = function(...) job$algorithm$fun(job = job, data = job$problem$data, instance = instance, ...) # this needs to be cat, message outputs to stderr which R cannot capture properly catf("### [bt%s]: Applying algorithm '%s' on problem '%s' for job %i (seed = %i) ...", now(), job$algo.name, job$prob.name, job$id, job$seed) with_seed(job$seed, do.call(wrapper, job$algo.pars, envir = .GlobalEnv)) } ================================================ FILE: R/files.R ================================================ dir = function(reg, what) { fs::path(fs::path_expand(reg$file.dir), what) } getResultFiles = function(reg, ids) { fs::path(dir(reg, "results"), sprintf("%i.rds", if (is.atomic(ids)) ids else ids$job.id)) } getLogFiles = function(reg, ids) { job.hash = log.file = NULL tab = reg$status[list(ids), c("job.id", "job.hash", "log.file")] tab[is.na(log.file) & !is.na(job.hash), log.file := sprintf("%s.log", job.hash)] tab[!is.na(log.file), log.file := fs::path(dir(reg, "logs"), log.file)]$log.file } getJobFiles = function(reg, hash) { fs::path(reg$file.dir, "jobs", sprintf("%s.rds", hash)) } getExternalDirs = function(reg, ids) { fs::path(dir(reg, "external"), if (is.atomic(ids)) ids else ids$job.id) } mangle = function(x) { sprintf("%s.rds", base32_encode(x, use.padding = FALSE)) } unmangle = function(x) { base32_decode(stri_sub(x, to = -5L), use.padding = FALSE) } file_remove = function(x) { fs::file_delete(x[fs::file_exists(x)]) while(any(i <- fs::file_exists(x))) { Sys.sleep(0.5) fs::file_delete(x[i]) } } file_mtime = function(x) { fs::file_info(x)$modification_time } writeRDS = function(object, file, compress = "gzip") { file_remove(file) saveRDS(object, file = file, version = 2L, compress = compress) waitForFile(file, 300) invisible(TRUE) } ================================================ FILE: R/findJobs.R ================================================ #' @title Find and Filter Jobs #' #' @description #' These functions are used to find and filter jobs, depending on either their parameters (\code{findJobs} and #' \code{findExperiments}), their tags (\code{findTagged}), or their computational status (all other functions, #' see \code{\link{getStatus}} for an overview). #' #' Note that \code{findQueued}, \code{findRunning}, \code{findOnSystem} and \code{findExpired} are somewhat heuristic #' and may report misleading results, depending on the state of the system and the \code{\link{ClusterFunctions}} implementation. #' #' See \code{\link{JoinTables}} for convenient set operations (unions, intersects, differences) on tables with job ids. #' #' @param expr [\code{expression}]\cr #' Predicate expression evaluated in the job parameters. #' Jobs for which \code{expr} evaluates to \code{TRUE} are returned. #' @templateVar ids.default all #' @template ids #' @template reg #' @return [\code{\link[data.table]{data.table}}] with column \dQuote{job.id} containing matched jobs. #' @seealso \code{\link{getStatus}} \code{\link{JoinTables}} #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' batchMap(identity, i = 1:3, reg = tmp) #' ids = findNotSubmitted(reg = tmp) #' #' # get all jobs: #' findJobs(reg = tmp) #' #' # filter for jobs with parameter i >= 2 #' findJobs(i >= 2, reg = tmp) #' #' # filter on the computational status #' findSubmitted(reg = tmp) #' findNotDone(reg = tmp) #' #' # filter on tags #' addJobTags(2:3, "my_tag", reg = tmp) #' findTagged(tags = "my_tag", reg = tmp) #' #' # combine filter functions using joins #' # -> jobs which are not done and not tagged (using an anti-join): #' ajoin(findNotDone(reg = tmp), findTagged("my_tag", reg = tmp)) findJobs = function(expr, ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) ids = convertIds(reg, ids) if (missing(expr)) return(ids %??% allIds(reg)) expr = substitute(expr) ee = parent.frame() fun = function(pars) eval(expr, pars, enclos = ee) job.pars = NULL setkeyv(mergedJobs(reg, ids, c("job.id", "job.pars"))[vlapply(job.pars, fun), "job.id"], "job.id") } #' @export #' @rdname findJobs #' @param prob.name [\code{character}]\cr #' Exact name of the problem (no substring matching). #' If not provided, all problems are matched. #' @param prob.pattern [\code{character}]\cr #' Regular expression pattern to match problem names. #' If not provided, all problems are matched. #' @param algo.name [\code{character}]\cr #' Exact name of the problem (no substring matching). #' If not provided, all algorithms are matched. #' @param algo.pattern [\code{character}]\cr #' Regular expression pattern to match algorithm names. #' If not provided, all algorithms are matched. #' @param prob.pars [\code{expression}]\cr #' Predicate expression evaluated in the problem parameters. #' @param algo.pars [\code{expression}]\cr #' Predicate expression evaluated in the algorithm parameters. #' @param repls [\code{integer}]\cr #' Whitelist of replication numbers. If not provided, all replications are matched. findExperiments = function(ids = NULL, prob.name = NA_character_, prob.pattern = NA_character_, algo.name = NA_character_, algo.pattern = NA_character_, prob.pars, algo.pars, repls = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, class = "ExperimentRegistry", sync = TRUE) assertString(prob.name, na.ok = TRUE, min.chars = 1L) assertString(prob.pattern, na.ok = TRUE, min.chars = 1L) assertString(algo.name, na.ok = TRUE, min.chars = 1L) assertString(algo.pattern, na.ok = TRUE, min.chars = 1L) ee = parent.frame() tab = mergedJobs(reg, convertIds(reg, ids), c("job.id", "problem", "algorithm", "prob.pars", "algo.pars", "repl")) if (!is.na(prob.name)) { problem = NULL tab = tab[problem == prob.name] } if (!is.na(prob.pattern)) { problem = NULL tab = tab[stri_detect_regex(problem, prob.pattern)] } if (!is.na(algo.name)) { algorithm = NULL tab = tab[algorithm == algo.name] } if (!is.na(algo.pattern)) { algorithm = NULL tab = tab[stri_detect_regex(algorithm, algo.pattern)] } if (!is.null(repls)) { repls = asInteger(repls, any.missing = FALSE) repl = NULL tab = tab[repl %in% repls] } if (!missing(prob.pars)) { expr = substitute(prob.pars) fun = function(pars) eval(expr, pars, enclos = ee) prob.pars = NULL tab = tab[vlapply(prob.pars, fun)] } if (!missing(algo.pars)) { expr = substitute(algo.pars) fun = function(pars) eval(expr, pars, enclos = ee) algo.pars = NULL tab = tab[vlapply(algo.pars, fun)] } setkeyv(tab[, "job.id"], "job.id")[] } #' @export #' @rdname findJobs findSubmitted = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findSubmitted(reg, convertIds(reg, ids)) } .findSubmitted = function(reg, ids = NULL) { submitted = NULL filter(reg$status, ids, c("job.id", "submitted"))[!is.na(submitted), "job.id"] } #' @export #' @rdname findJobs findNotSubmitted = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findNotSubmitted(reg, convertIds(reg, ids)) } .findNotSubmitted = function(reg, ids = NULL) { submitted = NULL filter(reg$status, ids, c("job.id", "submitted"))[is.na(submitted), "job.id"] } #' @export #' @rdname findJobs findStarted = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findStarted(reg, convertIds(reg, ids)) } .findStarted = function(reg, ids = NULL, batch.ids = getBatchIds(reg, status = "running")) { started = batch.id = status = NULL bids = batch.ids[status == "running"]$batch.id filter(reg$status, ids, c("job.id", "started", "batch.id"))[!is.na(started) | batch.id %in% bids, "job.id"] } #' @export #' @rdname findJobs findNotStarted = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findNotStarted(reg, convertIds(reg, ids)) } .findNotStarted = function(reg, ids = NULL, batch.ids = getBatchIds(reg, status = "running")) { started = batch.id = status = NULL bids = batch.ids[status == "running"]$batch.id filter(reg$status, ids, c("job.id", "started", "batch.id"))[is.na(started) & ! batch.id %chin% bids, "job.id"] } #' @export #' @rdname findJobs findDone = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findDone(reg, convertIds(reg, ids)) } .findDone = function(reg, ids = NULL) { done = error = NULL filter(reg$status, ids, c("job.id", "done", "error"))[!is.na(done) & is.na(error), "job.id"] } #' @export #' @rdname findJobs findNotDone = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findNotDone(reg, convertIds(reg, ids)) } .findNotDone = function(reg, ids = NULL) { done = error = NULL filter(reg$status, ids, c("job.id", "done", "error"))[is.na(done) | !is.na(error), "job.id"] } #' @export #' @rdname findJobs findErrors = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findErrors(reg, convertIds(reg, ids)) } .findErrors = function(reg, ids = NULL) { error = NULL filter(reg$status, ids, c("job.id", "error"))[!is.na(error), "job.id"] } # used in waitForJobs: find jobs which are done or error .findTerminated = function(reg, ids = NULL) { done = NULL filter(reg$status, ids, c("job.id", "done"))[!is.na(done), "job.id"] } #' @export #' @rdname findJobs findOnSystem = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findOnSystem(reg, convertIds(reg, ids)) } .findOnSystem = function(reg, ids = NULL, cols = "job.id", batch.ids = getBatchIds(reg, status = "all")) { if (length(batch.ids) == 0L) return(noIds()) submitted = done = batch.id = NULL filter(reg$status, ids, c("job.id", "submitted", "done", "batch.id"))[!is.na(submitted) & is.na(done) & batch.id %in% batch.ids$batch.id, cols, with = FALSE] } #' @export #' @rdname findJobs findRunning = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findOnSystem(reg, convertIds(reg, ids), batch.ids = getBatchIds(reg, status = "running")) } #' @export #' @rdname findJobs findQueued = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findOnSystem(reg, convertIds(reg, ids), batch.ids = getBatchIds(reg, status = "queued")) } #' @export #' @rdname findJobs findExpired = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) .findExpired(reg, convertIds(reg, ids)) } .findExpired = function(reg, ids = NULL, batch.ids = getBatchIds(reg)) { submitted = done = batch.id = NULL filter(reg$status, ids, c("job.id", "submitted", "done", "batch.id"))[!is.na(submitted) & is.na(done) & batch.id %chnin% batch.ids$batch.id, "job.id"] } #' @export #' @rdname findJobs #' @param tags [\code{character}]\cr #' Return jobs which are tagged with any of the tags provided. findTagged = function(tags = character(0L), ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg) ids = convertIds(reg, ids, default = allIds(reg)) assertCharacter(tags, any.missing = FALSE, pattern = "^[[:alnum:]_.]+$", min.len = 1L) tag = NULL ids[unique(reg$tags[tag %chin% tags, "job.id"], by = "job.id")] } ================================================ FILE: R/getDefaultRegistry.R ================================================ #' @title Get and Set the Default Registry #' @description #' \code{getDefaultRegistry} returns the registry currently set as default (or #' stops with an exception if none is set). \code{setDefaultRegistry} sets #' a registry as default. #' #' @template reg #' @family Registry #' @export getDefaultRegistry = function() { if (is.null(batchtools$default.registry)) stop("No default registry defined") batchtools$default.registry } #' @export #' @rdname getDefaultRegistry setDefaultRegistry = function(reg) { if (!is.null(reg)) assertRegistry(reg) batchtools$default.registry = reg } ================================================ FILE: R/getErrorMessages.R ================================================ #' @title Retrieve Error Messages #' #' @description #' Extracts error messages from the internal data base and returns them in a table. #' #' @templateVar ids.default findErrors #' @template ids #' @param missing.as.error [\code{logical(1)}]\cr #' Treat missing results as errors? If \code{TRUE}, the error message \dQuote{[not terminated]} is imputed #' for jobs which have not terminated. Default is \code{FALSE} #' @template reg #' @return [\code{\link[data.table]{data.table}}] with columns \dQuote{job.id}, \dQuote{terminated} (logical), #' \dQuote{error} (logical) and \dQuote{message} (string). #' @family debug #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' fun = function(i) if (i == 3) stop(i) else i #' ids = batchMap(fun, i = 1:5, reg = tmp) #' submitJobs(1:4, reg = tmp) #' waitForJobs(1:4, reg = tmp) #' getErrorMessages(ids, reg = tmp) #' getErrorMessages(ids, missing.as.error = TRUE, reg = tmp) getErrorMessages = function(ids = NULL, missing.as.error = FALSE, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) assertFlag(missing.as.error) ids = convertIds(reg, ids, default = .findErrors(reg = reg)) job.id = done = error = NULL tab = reg$status[ids, list(job.id, terminated = !is.na(done), error = !is.na(error), message = error)] if (missing.as.error) tab[!tab$terminated, c("error", "message") := list(TRUE, "[not terminated]")] tab[] } ================================================ FILE: R/getStatus.R ================================================ #' @title Summarize the Computational Status #' #' @description #' This function gives an encompassing overview over the computational status on your system. #' The status can be one or many of the following: #' \itemize{ #' \item \dQuote{defined}: Jobs which are defined via \code{\link{batchMap}} or \code{\link{addExperiments}}, but are not yet submitted. #' \item \dQuote{submitted}: Jobs which are submitted to the batch system via \code{\link{submitJobs}}, scheduled for execution. #' \item \dQuote{started}: Jobs which have been started. #' \item \dQuote{done}: Jobs which terminated successfully. #' \item \dQuote{error}: Jobs which terminated with an exception. #' \item \dQuote{running}: Jobs which are listed by the cluster functions to be running on the live system. Not supported for all cluster functions. #' \item \dQuote{queued}: Jobs which are listed by the cluster functions to be queued on the live system. Not supported for all cluster functions. #' \item \dQuote{system}: Jobs which are listed by the cluster functions to be queued or running. Not supported for all cluster functions. #' \item \dQuote{expired}: Jobs which have been submitted, but vanished from the live system. Note that this is determined heuristically and may include some false positives. #' } #' Here, a job which terminated successfully counts towards the jobs which are submitted, started and done. #' To retrieve the corresponding job ids, see \code{\link{findJobs}}. #' #' @templateVar ids.default all #' @template ids #' @template reg #' @return [\code{\link[data.table]{data.table}}] (with class \dQuote{Status} for printing). #' @seealso \code{\link{findJobs}} #' @export #' @family debug #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' fun = function(i) if (i == 3) stop(i) else i #' ids = batchMap(fun, i = 1:5, reg = tmp) #' submitJobs(ids = 1:4, reg = tmp) #' waitForJobs(reg = tmp) #' #' tab = getStatus(reg = tmp) #' print(tab) #' str(tab) getStatus = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) stats = getStatusTable(convertIds(reg, ids), reg = reg) setClasses(stats, c("Status", class(stats))) } getStatusTable = function(ids = NULL, batch.ids = getBatchIds(reg = reg), reg = getDefaultRegistry()) { submitted = started = done = error = status = NULL stats = merge(filter(reg$status, ids), batch.ids, by = "batch.id", all.x = TRUE, all.y = FALSE, sort = FALSE)[, list( defined = .N, submitted = count(submitted), started = sum(!is.na(started) | !is.na(status) & status == "running"), done = count(done), error = count(error), queued = sum(status == "queued", na.rm = TRUE), running = sum(status == "running", na.rm = TRUE), expired = sum(!is.na(submitted) & is.na(done) & is.na(status)) )] stats$done = stats$done - stats$error stats$system = stats$queued + stats$running return(stats) } #' @export print.Status = function(x, ...) { fmt = sprintf(" %%-13s: %%%ii (%%5.1f%%%%)", stri_width(x$defined)) pr = function(label, h) catf(fmt, label, h, h / x$defined * 100) catf("Status for %i jobs at %s:", x$defined, strftime(Sys.time())) pr("Submitted", x$submitted) pr("-- Queued", x$queued) pr("-- Started", x$started) pr("---- Running", x$running) pr("---- Done", x$done) pr("---- Error", x$error) pr("---- Expired", x$expired) } ================================================ FILE: R/helpers.R ================================================ mergedJobs = function(reg, ids, cols) { if (is.null(ids)) reg$defs[reg$status, cols, on = "def.id", nomatch = 0L, with = missing(cols)] else reg$defs[reg$status[ids, nomatch = 0L, on = "job.id"], cols, on = "def.id", nomatch = 0L, with = missing(cols)] } auto_increment = function(ids, n = 1L) { if (length(ids) == 0L) seq_len(n) else max(ids) + seq_len(n) } ustamp = function() { round(as.numeric(Sys.time()), 4L) } names2 = function (x, missing.val = NA_character_) { n = names(x) if (is.null(n)) return(rep.int(missing.val, length(x))) replace(n, is.na(n) | !nzchar(n), missing.val) } insert = function(x, y) { x[names2(y)] = y x[order(names2(x))] } makeProgressBar = function(...) { if (!batchtools$debug && getOption("batchtools.verbose", TRUE) && getOption("batchtools.progress", TRUE) && getOption("width") >= 20L) { progress_bar$new(...) } else { list(tick = function(len = 1, tokens = list()) NULL, update = function(ratio, tokens) NULL) } } seq_row = function(x) { seq_len(nrow(x)) } vlapply = function (x, fun, ..., use.names = TRUE) { vapply(X = x, FUN = fun, ..., FUN.VALUE = NA, USE.NAMES = use.names) } viapply = function (x, fun, ..., use.names = TRUE) { vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_integer_, USE.NAMES = use.names) } vnapply = function (x, fun, ..., use.names = TRUE) { vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_real_, USE.NAMES = use.names) } vcapply = function (x, fun, ..., use.names = TRUE) { vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_character_, USE.NAMES = use.names) } is.error = function(x) { inherits(x, "try-error") } # formating info message info = function(...) { if (getOption("batchtools.verbose", TRUE)) message(sprintf(...)) } # formating cat() catf = function(..., con = "") { cat(stri_flatten(sprintf(...), "\n"), "\n", sep = "", file = con) } # formating message() messagef = function(..., con = "") { message(sprintf(...)) } # formating waring() warningf = function(...) { warning(simpleWarning(sprintf(...), call = sys.call(sys.parent()))) } # formating stop() stopf = function(...) { stop(simpleError(sprintf(...), call = NULL)) } `%nin%` = function(x, y) { !match(x, y, nomatch = 0L) } `%chnin%` = function(x, y) { !chmatch(x, y, nomatch = 0L) } setClasses = function(x, cl) { setattr(x, "class", cl) x } #' @useDynLib batchtools count_not_missing count = function(x) { .Call(count_not_missing, x) } filterNull = function(x) { x[!vlapply(x, is.null)] } stri_trunc = function(str, length, append = "") { if (is.na(str)) return(str) if (stri_length(str) > length) { if (is.na(append) || !nzchar(append)) return(stri_sub(str, 1L, length)) return(stri_join(stri_sub(str, 1L, length - stri_length(append)), append)) } return(str) } Rscript = function() { fs::path(R.home("bin"), ifelse(testOS("windows"), "Rscript.exe", "Rscript")) } getSeed = function(start.seed, id) { if (id > .Machine$integer.max - start.seed) start.seed - .Machine$integer.max + id else start.seed + id } chsetdiff = function(x, y) { # Note: assumes that x has no duplicates x[chmatch(x, y, 0L) == 0L] } chintersect = function(x, y) { # Note: assumes that x has no duplicates x[chmatch(y, x, 0L)] } rnd_hash = function(prefix = "") { stri_join(prefix, digest(list(runif(1L), as.numeric(Sys.time())))) } now = function() { if (isTRUE(getOption("batchtools.timestamps", FALSE))) sprintf(" %s", strftime(Sys.time())) else "" } example_push_temp = function(i = 1L) { if (identical(Sys.getenv("IN_PKGDOWN"), "true")) { base = fs::path(dirname(tempdir()), "batchtools-example") dirs = if (i == 1L) fs::path(base, "reg") else fs::path(base, sprintf("reg%i", seq_len(i))) fs::dir_delete(dirs[fs::dir_exists(dirs)]) fs::file_temp_push(dirs) } } ================================================ FILE: R/ids.R ================================================ allIds = function(reg) { reg$status[, "job.id"] } noIds = function() { data.table(job.id = integer(0L), key = "job.id") } castIds = function(ids, setkey = TRUE) { if (is.data.table(ids)) { qassert(ids$job.id, "X", .var.name = "column 'job.id'") if (!is.integer(ids$job.id)) { "!DEBUG [castIds]: Casting ids in data.table to integer" ids = copy(ids) ids$job.id = as.integer(ids$job.id) } if (setkey && !identical(key(ids), "job.id")) { "!DEBUG [castIds]: Setting missing key for ids table" ids = copy(ids) setkeyv(ids, "job.id") } return(ids) } if (is.data.frame(ids)) { "!DEBUG [castIds]: Casting ids from data.frame to data.table" ids$job.id = asInteger(ids$job.id, .var.name = "column 'job.id'") ids = as.data.table(ids) if (setkey) setkeyv(ids, "job.id") return(ids) } if (qtest(ids, "X")) { "!DEBUG [castIds]: Casting ids from vector to data.table" return(data.table(job.id = as.integer(ids), key = if (setkey) "job.id" else NULL)) } stop("Format of 'ids' not recognized. Must be a data.frame with column 'job.id' or an integerish vector") } convertIds = function(reg, ids, default = NULL, keep.extra = character(0L), keep.order = FALSE) { if (is.null(ids)) return(default) ids = castIds(ids, setkey = !keep.order) if (anyDuplicated(ids, by = "job.id")) stop("Duplicated ids provided") invalid = ids[!reg$status, on = "job.id", which = TRUE] if (length(invalid) > 0L) { info("Ignoring %i invalid job id%s", length(invalid), if (length(ids) > 1L) "s" else "") ids = ids[-invalid] } cols = if (length(keep.extra)) union("job.id", chintersect(keep.extra, names(ids))) else "job.id" ids[, cols, with = FALSE] } convertId = function(reg, id) { id = convertIds(reg, id) if (nrow(id) != 1L) stopf("You must provide exactly one valid id (%i provided)", nrow(id)) return(id) } filter = function(x, y, cols) { if (is.null(y)) { if (missing(cols)) return(x) return(x[, cols, with = FALSE]) } return(x[y, cols, on = key(x), nomatch = 0L, with = missing(cols)]) } ================================================ FILE: R/killJobs.R ================================================ #' @title Kill Jobs #' #' @description #' Kill jobs which are currently running on the batch system. #' #' In case of an error when killing, the function tries - after a short sleep - to kill the remaining #' batch jobs again. If this fails three times for some jobs, the function gives up. Jobs that could be #' successfully killed are reset in the \link{Registry}. #' #' @templateVar ids.default findOnSystem #' @template ids #' @template reg #' @return [\code{\link[data.table]{data.table}}] with columns \dQuote{job.id}, the corresponding \dQuote{batch.id} and #' the logical flag \dQuote{killed} indicating success. #' @family debug #' @export killJobs = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, writeable = TRUE, sync = TRUE) kill = reg$cluster.functions$killJob if (is.null(kill)) stop("ClusterFunctions implementation does not support the killing of jobs") ids = convertIds(reg, ids, default = .findSubmitted(reg = reg)) tab = reg$status[.findOnSystem(ids = ids, reg = reg), c("job.id", "started", "batch.id")] if (nrow(tab) == 0L) return(data.table(job.id = integer(0L), batch.id = character(0L), killed = logical(0L))) runHook(reg, "pre.kill", tab) info("Trying to kill %i jobs ...", nrow(tab)) # kill queued jobs first, otherwise they might get started while killing running jobs setorderv(tab, "started", na.last = FALSE) tab[, "killed" := FALSE] batch.ids = unique(tab$batch.id) info("Killing %i real batch jobs ...", length(batch.ids)) for (i in seq_len(3L)) { tab[!tab$killed, "killed" := !is.error(try(kill(reg, .BY$batch.id), silent = TRUE)), by = "batch.id"] if (all(tab$killed)) break Sys.sleep(2) } if (!all(tab$killed)) warningf("Could not kill %i jobs", sum(!tab$killed)) # reset killed jobs merged = sync(reg = reg) cols = c("submitted", "started", "done", "error", "mem.used", "resource.id", "batch.id", "log.file", "job.hash") reg$status[tab[tab$killed], (cols) := list(NA_real_, NA_real_, NA_real_, NA_character_, NA_real_, NA_integer_, NA_character_, NA_character_, NA_character_)] saveRegistry(reg) file_remove(merged) tab = setkeyv(tab[, c("job.id", "batch.id", "killed")], "job.id") Sys.sleep(reg$cluster.functions$scheduler.latency) runHook(reg, "post.kill", tab) return(tab) } ================================================ FILE: R/loadRegistry.R ================================================ #' @title Load a Registry from the File System #' #' @description #' Loads a registry from its \code{file.dir}. #' #' Multiple R sessions accessing the same registry simultaneously can lead to database inconsistencies. #' This is especially dangerous if the same \code{file.dir} is accessed from multiple machines, e.g. via a mount. #' #' If you just need to check on the status or peek into some preliminary results while another process is still submitting or waiting #' for pending results, you can load the registry in a read-only mode. #' All operations that need to change the registry will raise an exception in this mode. #' Files communicated back by the computational nodes are parsed to update the registry in memory while the registry on the file system remains unchanged. #' #' A heuristic tries to detect if the registry has been altered in the background by an other process and in this case automatically restricts the current registry to read-only mode. #' However, you should rely on this heuristic to work flawlessly. #' Thus, set to \code{writeable} to \code{TRUE} if and only if you are absolutely sure that other state-changing processes are terminated. #' #' If you need write access, load the registry with \code{writeable} set to \code{TRUE}. #' #' @param writeable [\code{logical(1)}]\cr #' Loads the registry in read-write mode. Default is \code{FALSE}. #' @inheritParams makeRegistry #' @family Registry #' @return [\code{\link{Registry}}]. #' @export loadRegistry = function(file.dir, work.dir = NULL, conf.file = findConfFile(), make.default = TRUE, writeable = FALSE) { assertString(file.dir) assertDirectory(file.dir) assertString(work.dir, null.ok = TRUE) assertString(conf.file, na.ok = TRUE) assertFlag(make.default) assertFlag(writeable) # read registry if (writeable) info("Reading registry in read-write mode") else info(stri_paste( "Reading registry in read-only mode.", "You can inspect results and errors, but cannot add, remove, submit or alter jobs in any way.", "If you need write-access, re-load the registry with `loadRegistry([...], writeable = TRUE)`." )) file.dir = fs::path_abs(file.dir) reg = readRegistry(file.dir) # re-allocate stuff which has not been serialized reg$file.dir = file.dir reg$writeable = writeable reg$mtime = file_mtime(fs::path(reg$file.dir, "registry.rds")) alloc.col(reg$status, ncol(reg$status)) alloc.col(reg$defs, ncol(reg$defs)) alloc.col(reg$resources, ncol(reg$resources)) alloc.col(reg$tags, ncol(reg$tags)) if (!is.null(work.dir)) reg$work.dir = fs::path_abs(work.dir) updated = updateRegistry(reg = reg) # try to load dependencies relative to work.dir if (fs::dir_exists(reg$work.dir)) { with_dir(reg$work.dir, loadRegistryDependencies(reg)) } else { warningf("The work.dir '%s' does not exist, jobs might fail to run on this system.", reg$work.dir) loadRegistryDependencies(reg) } # source system config setSystemConf(reg, conf.file) if (make.default) batchtools$default.registry = reg merged = sync(reg = reg) if (length(merged) || updated) { saveRegistry(reg) file_remove(merged) } return(reg) } readRegistry = function(file.dir) { fn.old = fs::path(file.dir, "registry.rds") fn.new = fs::path(file.dir, "registry.new.rds") if (fs::file_exists(fn.new)) { reg = try(readRDS(fn.new), silent = TRUE) if (!is.error(reg)) { fs::file_move(fn.new, fn.old) return(reg) } else { warning("Latest version of registry seems to be corrupted, trying backup ...") } } if (fs::file_exists(fn.old)) { reg = try(readRDS(fn.old), silent = TRUE) if (!is.error(reg)) return(reg) stop("Could not load the registry, files seem to be corrupt") } stopf("No registry found in '%s'", file.dir) } ================================================ FILE: R/loadResult.R ================================================ #' @title Load the Result of a Single Job #' #' @description #' Loads the result of a single job. #' #' @template id #' @template reg #' @return [\code{ANY}]. The stored result. #' @family Results #' @export loadResult = function(id, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) id = convertId(reg, id) if (nrow(.findDone(reg, id)) == 0L) stopf("Job with id %i not terminated", id$job.id) fn = getResultFiles(reg, id) return(readRDS(fn)) } ================================================ FILE: R/mergeRegistries.R ================================================ # @title Merge the computational status of two registries # # @description # Merges the computational status of jobs found in the registries located at # \code{file.dir} into the registry \code{reg}. # Both registries must have the same jobs defined and may only differ w.r.t. # the computational status of the jobs. # This function is intended to be applied in the following context: # \enumerate{ # \item Define all jobs locally (and ensure they work as intended by testing them). # \item Copy the \code{file.dir} to remote systems. # \item Submit a subset of jobs on each system, # \item After all jobs are terminated, copy both registries back to the local file system. Remember to keep backups. # \item Load one registry with \code{\link{loadRegistry}}, merge the second with this function. # } # # @param file.dir [\code{character(1)}]\cr # Path to first registry. # @template reg # @return [\code{\link{Registry}}]. # @export # @examples # \dontshow{ batchtools:::example_push_temp(2) } # target = makeRegistry(NA, make.default = FALSE) # batchMap(identity, 1:10, reg = target) # td = tempdir() # file.copy(target$file.dir, td, recursive = TRUE) # file.dir = file.path(td, basename(target$file.dir)) # source = loadRegistry(file.dir, update.paths = TRUE) # # submitJobs(1:5, reg = target) # submitJobs(6:10, reg = source) # # new = mergeRegistries(source, target) mergeRegistries = function(source, target = getDefaultRegistry()) { assertRegistry(source, writeable = TRUE, sync = TRUE, running.ok = FALSE) assertRegistry(target, writeable = TRUE, sync = TRUE, running.ok = FALSE) if (fs::path_real(source$file.dir) == fs::path_real(target$file.dir)) stop("You must provide two different registries (using different file directories") hash = function(x) unlist(.mapply(function(...) digest(list(...)), x[, !"def.id"], list())) # update only jobs which are not already computed and only those which are terminated status = source$status[.findNotDone(target), ][.findSubmitted(source)] # create a hash of parameters to match on status$hash = hash(sjoin(source$defs, status)) # create temp table for target with the same hashes tmp = data.table(def.id = status$def.id, hash = hash(sjoin(target$defs, status))) # filter status to keep only jobs with matching ids and hashes # in status there are now only jobs which have an exact match in target$status # perform an updating join status = status[tmp, nomatch = 0L, on = c("def.id", "hash")] info("Merging %i jobs ...", nrow(status)) src = getResultFiles(source, status) dst = fs::path(dir(target, "results"), fs::path_file(src)) info("Copying %i result files ...", length(src)) fs::file_copy(src, dst, overwrite = TRUE) src = getLogFiles(source, status) dst = fs::path(dir(target, "logs"), fs::path_file(src)) info("Copying %i log files ...", length(src)) fs::file_copy(src, dst, overwrite = TRUE) ext.dirs = as.integer(chintersect(list.files(dir(source, "external")), as.character(status$job.id))) if (length(ext.dirs) > 0L) { src = getExternalDirs(source, ext.dirs) dst = getExternalDirs(target, ext.dirs) info("Copying %i external directories ...", length(ext.dirs)) fs::dir_delete(dst[fs::dir_exists(dst)]) fs::dir_copy(src, fs::path_dir(dst)) } target$status = ujoin(target$status, status, by = "job.id") saveRegistry(reg = target) } ================================================ FILE: R/reduceResults.R ================================================ #' @title Reduce Results #' #' @description #' A version of \code{\link[base]{Reduce}} for \code{\link{Registry}} objects #' which iterates over finished jobs and aggregates them. #' All jobs must have terminated, an error is raised otherwise. #' #' @note #' If you have thousands of jobs, disabling the progress bar (\code{options(batchtools.progress = FALSE)}) #' can significantly increase the performance. #' #' @templateVar ids.default findDone #' @template ids #' @param fun [\code{function}]\cr #' A function to reduce the results. The result of previous iterations (or #' the \code{init}) will be passed as first argument, the result of of the #' i-th iteration as second. See \code{\link[base]{Reduce}} for some #' examples. #' If the function has the formal argument \dQuote{job}, the \code{\link{Job}}/\code{\link{Experiment}} #' is also passed to the function (named). #' @param init [\code{ANY}]\cr #' Initial element, as used in \code{\link[base]{Reduce}}. #' If missing, the reduction uses the result of the first job as \code{init} and the reduction starts #' with the second job. #' @param ... [\code{ANY}]\cr #' Additional arguments passed to function \code{fun}. #' @return Aggregated results in the same order as provided ids. #' Return type depends on the user function. If \code{ids} #' is empty, \code{reduceResults} returns \code{init} (if available) or \code{NULL} otherwise. #' @template reg #' @family Results #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' batchMap(function(a, b) list(sum = a+b, prod = a*b), a = 1:3, b = 1:3, reg = tmp) #' submitJobs(reg = tmp) #' waitForJobs(reg = tmp) #' #' # Extract element sum from each result #' reduceResults(function(aggr, res) c(aggr, res$sum), init = list(), reg = tmp) #' #' # Aggregate element sum via '+' #' reduceResults(function(aggr, res) aggr + res$sum, init = 0, reg = tmp) #' #' # Aggregate element prod via '*' where parameter b < 3 #' reduce = function(aggr, res, job) { #' if (job$pars$b >= 3) #' return(aggr) #' aggr * res$prod #' } #' reduceResults(reduce, init = 1, reg = tmp) #' #' # Reduce to data.frame() (inefficient, use reduceResultsDataTable() instead) #' reduceResults(rbind, init = data.frame(), reg = tmp) #' #' # Reduce to data.frame by collecting results first, then utilize vectorization of rbind: #' res = reduceResultsList(fun = as.data.frame, reg = tmp) #' do.call(rbind, res) #' #' # Reduce with custom combine function: #' comb = function(x, y) list(sum = x$sum + y$sum, prod = x$prod * y$prod) #' reduceResults(comb, reg = tmp) #' #' # The same with neutral element NULL #' comb = function(x, y) if (is.null(x)) y else list(sum = x$sum + y$sum, prod = x$prod * y$prod) #' reduceResults(comb, init = NULL, reg = tmp) #' #' # Alternative: Reduce in list, reduce manually in a 2nd step #' res = reduceResultsList(reg = tmp) #' Reduce(comb, res) reduceResults = function(fun, ids = NULL, init, ..., reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) ids = convertIds(reg, ids, default = .findDone(reg = reg), keep.order = TRUE) fun = match.fun(fun) if (nrow(.findNotDone(reg, ids))) stop("All jobs must be have been successfully computed") if (nrow(ids) == 0L) return(if (missing(init)) NULL else init) fns = getResultFiles(reg, ids) if (missing(init)) { init = readRDS(fns[1L]) fns = fns[-1L] if (length(fns) == 0L) return(init) } pb = makeProgressBar(total = length(fns), format = "Reduce [:bar] :percent eta: :eta") if ("job" %chin% names(formals(fun))) { for (i in seq_along(fns)) { init = fun(init, readRDS(fns[i]), job = makeJob(ids[i], reg = reg), ...) pb$tick() } } else { for (i in seq_along(fns)) { init = fun(init, readRDS(fns[i]), ...) pb$tick() } } return(init) } #' @title Apply Functions on Results #' #' @description #' Applies a function on the results of your finished jobs and thereby collects #' them in a \code{\link[base]{list}} or \code{\link[data.table]{data.table}}. #' The later requires the provided function to return a list (or \code{data.frame}) of scalar values. #' See \code{\link[data.table]{rbindlist}} for features and limitations of the aggregation. #' #' If not all jobs are terminated, the respective result will be \code{NULL}. #' #' @note #' If you have thousands of jobs, disabling the progress bar (\code{options(batchtools.progress = FALSE)}) #' can significantly increase the performance. #' #' @templateVar ids.default findDone #' @template ids #' @param fun [\code{function}]\cr #' Function to apply to each result. The result is passed unnamed as first argument. If \code{NULL}, the identity is used. #' If the function has the formal argument \dQuote{job}, the \code{\link{Job}}/\code{\link{Experiment}} is also passed to the function. #' @param ... [\code{ANY}]\cr #' Additional arguments passed to to function \code{fun}. #' @template missing.val #' @template reg #' @return \code{reduceResultsList} returns a list of the results in the same order as the provided ids. #' \code{reduceResultsDataTable} returns a \code{\link[data.table]{data.table}} with columns \dQuote{job.id} and additional result columns #' created via \code{\link[data.table]{rbindlist}}, sorted by \dQuote{job.id}. #' @seealso \code{\link{reduceResults}} #' @family Results #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(2) } #' ### Example 1 - reduceResultsList #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' batchMap(function(x) x^2, x = 1:10, reg = tmp) #' submitJobs(reg = tmp) #' waitForJobs(reg = tmp) #' reduceResultsList(fun = sqrt, reg = tmp) #' #' ### Example 2 - reduceResultsDataTable #' tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE) #' #' # add first problem #' fun = function(job, data, n, mean, sd, ...) rnorm(n, mean = mean, sd = sd) #' addProblem("rnorm", fun = fun, reg = tmp) #' #' # add second problem #' fun = function(job, data, n, lambda, ...) rexp(n, rate = lambda) #' addProblem("rexp", fun = fun, reg = tmp) #' #' # add first algorithm #' fun = function(instance, method, ...) if (method == "mean") mean(instance) else median(instance) #' addAlgorithm("average", fun = fun, reg = tmp) #' #' # add second algorithm #' fun = function(instance, ...) sd(instance) #' addAlgorithm("deviation", fun = fun, reg = tmp) #' #' # define problem and algorithm designs #' library(data.table) #' prob.designs = algo.designs = list() #' prob.designs$rnorm = CJ(n = 100, mean = -1:1, sd = 1:5) #' prob.designs$rexp = data.table(n = 100, lambda = 1:5) #' algo.designs$average = data.table(method = c("mean", "median")) #' algo.designs$deviation = data.table() #' #' # add experiments and submit #' addExperiments(prob.designs, algo.designs, reg = tmp) #' submitJobs(reg = tmp) #' #' # collect results and join them with problem and algorithm paramters #' res = ijoin( #' getJobPars(reg = tmp), #' reduceResultsDataTable(reg = tmp, fun = function(x) list(res = x)) #' ) #' unwrap(res, sep = ".") reduceResultsList = function(ids = NULL, fun = NULL, ..., missing.val, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) assertFunction(fun, null.ok = TRUE) ids = convertIds(reg, ids, default = .findDone(reg = reg), keep.order = TRUE) .reduceResultsList(ids, fun, ..., missing.val = missing.val, reg = reg) } #' @export #' @rdname reduceResultsList reduceResultsDataTable = function(ids = NULL, fun = NULL, ..., missing.val, reg = getDefaultRegistry()) { assertRegistry(reg, sync = TRUE) ids = convertIds(reg, ids, default = .findDone(reg = reg)) assertFunction(fun, null.ok = TRUE) results = .reduceResultsList(ids = ids, fun = fun, ..., missing.val = missing.val, reg = reg) if (length(results) == 0L) return(noIds()) ids[, "result" := results][] } .reduceResultsList = function(ids, fun = NULL, ..., missing.val, reg = getDefaultRegistry()) { if (is.null(fun)) { worker = function(.res, .job, ...) .res } else { fun = match.fun(fun) if ("job" %chin% names(formals(fun))) worker = function(.res, .job, ...) fun(.res, job = .job, ...) else worker = function(.res, .job, ...) fun(.res, ...) } results = vector("list", nrow(ids)) done = ids[.findDone(reg, ids), nomatch = 0L, which = TRUE, on = "job.id"] if (missing(missing.val)) { if (length(done) != nrow(ids)) stop("All jobs must be have been successfully computed") } else { results[setdiff(seq_row(ids), done)] = list(missing.val) } if (length(done) > 0L) { fns = getResultFiles(reg, ids) pb = makeProgressBar(total = length(fns), format = "Reducing [:bar] :percent eta: :eta") reader = RDSReader$new(TRUE) for (i in done) { res = worker(readRDS(fns[i]), makeJob(ids$job.id[i], reader = reader, reg = reg), ...) if (!is.null(res)) results[[i]] = res rm(res) pb$tick() } } return(results) } ================================================ FILE: R/removeExperiments.R ================================================ #' @title Remove Experiments #' #' @description #' Remove Experiments from an \code{\link{ExperimentRegistry}}. #' This function automatically checks if any of the jobs to reset is either pending or running. #' However, if the implemented heuristic fails, this can lead to inconsistencies in the data base. #' Use with care while jobs are running. #' #' @templateVar ids.default none #' @template ids #' @template expreg #' @return [\code{\link[data.table]{data.table}}] of removed job ids, invisibly. #' @export #' @family Experiment removeExperiments = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, class = "ExperimentRegistry", writeable = TRUE, running.ok = FALSE) ids = convertIds(reg, ids, default = noIds()) info("Removing %i Experiments ...", nrow(ids)) reg$status = reg$status[!ids] i = reg$defs[!reg$status, on = "def.id", which = TRUE] if (length(i) > 0L) { info("Removing %i job definitions ...", length(i)) reg$defs = reg$defs[-i] } fns = getResultFiles(reg, ids) file_remove(fns) sweepRegistry(reg) invisible(ids) } ================================================ FILE: R/removeRegistry.R ================================================ #' @title Remove a Registry from the File System #' #' @description #' All files will be erased from the file system, including all results. #' If you wish to remove only intermediate files, use \code{\link{sweepRegistry}}. #' #' @param wait [\code{numeric(1)}]\cr #' Seconds to wait before proceeding. This is a safety measure to not #' accidentally remove your precious files. Set to 0 in #' non-interactive scripts to disable this precaution. #' @template reg #' #' @return [\code{character(1)}]: Path of the deleted file directory. #' @export #' @family Registry #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' removeRegistry(0, tmp) removeRegistry = function(wait = 5, reg = getDefaultRegistry()) { assertRegistry(reg, writeable = TRUE, sync = TRUE, running.ok = FALSE) assertNumber(wait, lower = 0) if (wait > 0) { info("This deletes all files in '%s'. Proceeding in %g seconds ...", reg$file.dir, wait) Sys.sleep(wait) } if (identical(batchtools$default.registry$file.dir, reg$file.dir)) { info("Unsetting registry as default") setDefaultRegistry(NULL) } info("Recursively removing files in '%s' ...", reg$file.dir) fs::dir_delete(reg$file.dir) } ================================================ FILE: R/resetJobs.R ================================================ #' @title Reset the Computational State of Jobs #' #' @description #' Resets the computational state of jobs in the \code{\link{Registry}}. #' This function automatically checks if any of the jobs to reset is either pending or running. #' However, if the implemented heuristic fails, this can lead to inconsistencies in the data base. #' Use with care while jobs are running. #' #' @templateVar ids.default none #' @template ids #' @template reg #' @return [\code{\link[data.table]{data.table}}] of job ids which have been reset. #' See \code{\link{JoinTables}} for examples on working with job tables. #' @family debug #' @export resetJobs = function(ids = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, writeable = TRUE, sync = TRUE, running.ok = FALSE) ids = convertIds(reg, ids, default = noIds()) info("Resetting %i jobs in DB ...", nrow(ids)) cols = c("submitted", "started", "done", "error", "mem.used", "resource.id", "batch.id", "log.file", "job.hash") reg$status[ids, (cols) := list(NA_real_, NA_real_, NA_real_, NA_character_, NA_real_, NA_integer_, NA_character_, NA_character_, NA_character_), on = "job.id"] fns = getResultFiles(reg, ids) file_remove(fns) sweepRegistry(reg) invisible(ids) } ================================================ FILE: R/runOSCommand.R ================================================ #' @title Run OS Commands on Local or Remote Machines #' #' @description #' This is a helper function to run arbitrary OS commands on local or remote machines. #' The interface is similar to \code{\link[base]{system2}}, but it always returns the exit status #' \emph{and} the output. #' #' @param sys.cmd [\code{character(1)}]\cr #' Command to run. #' @param sys.args [\code{character}]\cr #' Arguments for \code{sys.cmd}. #' @param stdin [\code{character(1)}]\cr #' Argument passed to \code{\link[base]{system2}}. #' @param nodename [\code{character(1)}]\cr #' Name of the SSH node to run the command on. If set to \dQuote{localhost} (default), the command #' is not piped through SSH. #' @return [\code{named list}] with \dQuote{sys.cmd}, \dQuote{sys.args}, \dQuote{exit.code} (integer), \dQuote{output} (character). #' @export #' @family ClusterFunctionsHelper #' @examples #' \dontrun{ #' runOSCommand("ls") #' runOSCommand("ls", "-al") #' runOSCommand("notfound") #' } runOSCommand = function(sys.cmd, sys.args = character(0L), stdin = "", nodename = "localhost") { assertCharacter(sys.cmd, any.missing = FALSE, len = 1L) assertCharacter(sys.args, any.missing = FALSE) assertString(nodename, min.chars = 1L) if (!isLocalHost(nodename)) { command = sprintf("%s %s", sys.cmd, stri_flatten(sys.args, " ")) if (getRversion() < "4.0.0") { command = shQuote(command) } command = stri_replace_all_fixed(command, "\\$", "$") sys.args = c("-q", nodename, command) sys.cmd = "ssh" } "!DEBUG [runOSCommand]: cmd: `sys.cmd` `stri_flatten(sys.args, ' ')`" if (nzchar(Sys.which(sys.cmd))) { res = suppressWarnings(system2(command = sys.cmd, args = sys.args, stdin = stdin, stdout = TRUE, stderr = TRUE, wait = TRUE)) output = as.character(res) exit.code = attr(res, "status") %??% 0L } else { output = "command not found" exit.code = 127L } "!DEBUG [runOSCommand]: OS result (stdin '`stdin`', exit code `exit.code`):" "!DEBUG [runOSCommand]: `paste0(output, sep = '\n')`" return(list(sys.cmd = sys.cmd, sys.args = sys.args, exit.code = exit.code, output = output)) } isLocalHost = function(nodename) { is.null(nodename) || nodename %chin% c("localhost", "127.0.0.1", "::1") } OSError = function(msg, res) { cmd = stri_flatten(c(res$sys.cmd, res$sys.args), collapse = " ") %??% NA_character_ exit.code = res$exit.code %??% NA_integer_ output = stri_flatten(res$output, "\n") %??% "" stopf("%s (exit code %i);\ncmd: '%s'\noutput:\n%s", msg, exit.code, cmd, output) } ================================================ FILE: R/saveRegistry.R ================================================ #' @title Store the Registy to the File System #' #' @description #' Stores the registry on the file system in its \dQuote{file.dir} (specified #' for construction in \code{\link{makeRegistry}}, can be accessed via #' \code{reg$file.dir}). #' This function is usually called internally whenever needed. #' #' @template reg #' @return [\code{logical(1)}]: \code{TRUE} if the registry was saved, #' \code{FALSE} otherwise (if the registry is read-only). #' @family Registry #' @export saveRegistry = function(reg = getDefaultRegistry()) { if (!reg$writeable) { "!DEBUG [saveRegistry]: Skipping saveRegistry (read-only)" return(FALSE) } "!DEBUG [saveRegistry]: Saving Registry" reg$hash = rnd_hash() fn = fs::path(reg$file.dir, c("registry.new.rds", "registry.rds")) ee = new.env(parent = asNamespace("batchtools")) exclude = c("cluster.functions", "default.resources", "temp.dir", "mtime", "writeable") list2env(mget(chsetdiff(ls(reg), exclude), reg), ee) class(ee) = class(reg) writeRDS(ee, file = fn[1L], compress = reg$compress) fs::file_move(fn[1L], fn[2L]) reg$mtime = file_mtime(fn[2L]) return(TRUE) } ================================================ FILE: R/sleep.R ================================================ getSleepFunction = function(reg, sleep) { if (is.null(sleep)) { if (is.null(reg$sleep)) return(function(i) { Sys.sleep(5 + 115 * pexp(i - 1, rate = 0.01)) }) sleep = reg$sleep } if (is.numeric(sleep)) { assertNumber(sleep, lower = 0) return(function(i) Sys.sleep(sleep)) } if (is.function(sleep)) { return(function(i) Sys.sleep(sleep(i))) } stop("Argument 'sleep' must be either a numeric value or function(i)") } ================================================ FILE: R/submitJobs.R ================================================ #' @title Submit Jobs to the Batch Systems #' #' @description #' Submits defined jobs to the batch system. #' #' After submitting the jobs, you can use \code{\link{waitForJobs}} to wait for the #' termination of jobs or call \code{\link{reduceResultsList}}/\code{\link{reduceResults}} #' to collect partial results. #' The progress can be monitored with \code{\link{getStatus}}. #' #' @section Resources: #' You can pass arbitrary resources to \code{submitJobs()} which then are available in the cluster function template. #' Some resources' names are standardized and it is good practice to stick to the following nomenclature to avoid confusion: #' \describe{ #' \item{walltime:}{Upper time limit in seconds for jobs before they get killed by the scheduler. Can be passed as additional column as part of \code{ids} to set per-job resources.} #' \item{memory:}{Memory limit in Mb. If jobs exceed this limit, they are usually killed by the scheduler. Can be passed as additional column as part of \code{ids} to set per-job resources.} #' \item{ncpus:}{Number of (physical) CPUs to use on the slave. Can be passed as additional column as part of \code{ids} to set per-job resources.} #' \item{omp.threads:}{Number of threads to use via OpenMP. Used to set environment variable \dQuote{OMP_NUM_THREADS}. Can be passed as additional column as part of \code{ids} to set per-job resources.} #' \item{pp.size:}{Maximum size of the pointer protection stack, see \code{\link[base]{Memory}}.} #' \item{blas.threads:}{Number of threads to use for the BLAS backend. Used to set environment variables \dQuote{MKL_NUM_THREADS} and \dQuote{OPENBLAS_NUM_THREADS}. Can be passed as additional column as part of \code{ids} to set per-job resources.} #' \item{measure.memory:}{Enable memory measurement for jobs. Comes with a small runtime overhead.} #' \item{chunks.as.arrayjobs:}{Execute chunks as array jobs.} #' \item{pm.backend:}{Start a \pkg{parallelMap} backend on the slave.} #' \item{foreach.backend:}{Start a \pkg{foreach} backend on the slave.} #' \item{clusters:}{Resource used for Slurm to select the set of clusters to run \code{sbatch}/\code{squeue}/\code{scancel} on.} #' } #' #' @section Chunking of Jobs: #' Multiple jobs can be grouped (chunked) together to be executed sequentially on the batch system as a single batch job. #' This is especially useful to avoid overburding the scheduler by submitting thousands of jobs simultaneously. #' To chunk jobs together, job ids must be provided as \code{data.frame} with columns \dQuote{job.id} and \dQuote{chunk} (integer). #' All jobs with the same chunk number will be executed sequentially inside the same batch job. #' The utility functions \code{\link{chunk}}, \code{\link{binpack}} and \code{\link{lpt}} #' can assist in grouping jobs. #' #' @section Array Jobs: #' If your cluster supports array jobs, you can set the resource \code{chunks.as.arrayjobs} to \code{TRUE} in order #' to execute chunks as job arrays on the cluster. #' For each chunk of size \code{n}, \pkg{batchtools} creates a \code{\link{JobCollection}} of (possibly heterogeneous) jobs which is #' submitted to the scheduler as a single array job with \code{n} repetitions. #' For each repetition, the \code{JobCollection} is first read from the file system, then subsetted to the \code{i}-th job using #' the environment variable \code{reg$cluster.functions$array.var} (depending on the cluster backend, defined automatically) and finally #' executed. #' #' @section Order of Submission: #' Jobs are submitted in the order of chunks, i.e. jobs which have chunk number #' \code{sort(unique(ids$chunk))[1]} first, then jobs with chunk number \code{sort(unique(ids$chunk))[2]} #' and so on. If no chunks are provided, jobs are submitted in the order of \code{ids$job.id}. #' #' @section Limiting the Number of Jobs: #' If requested, \code{submitJobs} tries to limit the number of concurrent jobs of the user by waiting until jobs terminate #' before submitting new ones. #' This can be controlled by setting \dQuote{max.concurrent.jobs} in the configuration file (see \code{\link{Registry}}) #' or by setting the resource \dQuote{max.concurrent.jobs} to the maximum number of jobs to run simultaneously. #' If both are set, the setting via the resource takes precedence over the setting in the configuration. #' #' @section Measuring Memory: #' Setting the resource \code{measure.memory} to \code{TRUE} turns on memory measurement: #' \code{\link[base]{gc}} is called directly before and after the job and the difference is #' stored in the internal database. Note that this is just a rough estimate and does #' neither work reliably for external code like C/C++ nor in combination with threading. #' #' @section Inner Parallelization: #' Inner parallelization is typically done via threading, sockets or MPI. #' Two backends are supported to assist in setting up inner parallelization. #' #' The first package is \pkg{parallelMap}. #' If you set the resource \dQuote{pm.backend} to \dQuote{multicore}, \dQuote{socket} or \dQuote{mpi}, #' \code{\link[parallelMap]{parallelStart}} is called on the slave before the first job in the chunk is started #' and \code{\link[parallelMap]{parallelStop}} is called after the last job terminated. #' This way, the resources for inner parallelization can be set and get automatically stored just like other computational resources. #' The function provided by the user just has to call \code{\link[parallelMap]{parallelMap}} to start parallelization using the preconfigured backend. #' #' To control the number of CPUs, you have to set the resource \code{ncpus}. #' Otherwise \code{ncpus} defaults to the number of available CPUs (as reported by (see \code{\link[parallel]{detectCores}})) #' on the executing machine for multicore and socket mode and defaults to the return value of \code{\link[Rmpi]{mpi.universe.size}-1} for MPI. #' Your template must be set up to handle the parallelization, e.g. request the right number of CPUs or start R with \code{mpirun}. #' You may pass further options like \code{level} to \code{\link[parallelMap]{parallelStart}} via the named list \dQuote{pm.opts}. #' #' The second supported parallelization backend is \pkg{foreach}. #' If you set the resource \dQuote{foreach.backend} to \dQuote{seq} (sequential mode), \dQuote{parallel} (\pkg{doParallel}) or #' \dQuote{mpi} (\pkg{doMPI}), the requested \pkg{foreach} backend is automatically registered on the slave. #' Again, the resource \code{ncpus} is used to determine the number of CPUs. #' #' Neither the namespace of \pkg{parallelMap} nor the namespace \pkg{foreach} are attached. #' You have to do this manually via \code{\link[base]{library}} or let the registry load the packages for you. #' #' @note #' If you a large number of jobs, disabling the progress bar (\code{options(batchtools.progress = FALSE)}) #' can significantly increase the performance of \code{submitJobs}. #' #' @templateVar ids.default findNotSubmitted #' @template ids #' @param resources [\code{named list}]\cr #' Computational resources for the jobs to submit. The actual elements of this list #' (e.g. something like \dQuote{walltime} or \dQuote{nodes}) depend on your template file, exceptions are outlined in the section 'Resources'. #' Default settings for a system can be set in the configuration file by defining the named list \code{default.resources}. #' Note that these settings are merged by name, e.g. merging \code{list(walltime = 300)} into \code{list(walltime = 400, memory = 512)} #' will result in \code{list(walltime = 300, memory = 512)}. #' Same holds for individual job resources passed as additional column of \code{ids} (c.f. section 'Resources'). #' @param sleep [\code{function(i)} | \code{numeric(1)}]\cr #' Parameter to control the duration to sleep between temporary errors. #' You can pass an absolute numeric value in seconds or a \code{function(i)} which returns the number of seconds to sleep in the \code{i}-th #' iteration between temporary errors. #' If not provided (\code{NULL}), tries to read the value (number/function) from the configuration file (stored in \code{reg$sleep}) or defaults to #' a function with exponential backoff between 5 and 120 seconds. #' @template reg #' @return [\code{\link[data.table]{data.table}}] with columns \dQuote{job.id} and \dQuote{chunk}. #' @export #' @examples #' \dontshow{ batchtools:::example_push_temp(3) } #' ### Example 1: Submit subsets of jobs #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' #' # toy function which fails if x is even and an input file does not exists #' fun = function(x, fn) if (x %% 2 == 0 && !file.exists(fn)) stop("file not found") else x #' #' # define jobs via batchMap #' fn = tempfile() #' ids = batchMap(fun, 1:20, reg = tmp, fn = fn) #' #' # submit some jobs #' ids = 1:10 #' submitJobs(ids, reg = tmp) #' waitForJobs(ids, reg = tmp) #' getStatus(reg = tmp) #' #' # create the required file and re-submit failed jobs #' file.create(fn) #' submitJobs(findErrors(ids, reg = tmp), reg = tmp) #' getStatus(reg = tmp) #' #' # submit remaining jobs which have not yet been submitted #' ids = findNotSubmitted(reg = tmp) #' submitJobs(ids, reg = tmp) #' getStatus(reg = tmp) #' #' # collect results #' reduceResultsList(reg = tmp) #' #' ### Example 2: Using memory measurement #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' #' # Toy function which creates a large matrix and returns the column sums #' fun = function(n, p) colMeans(matrix(runif(n*p), n, p)) #' #' # Arguments to fun: #' args = data.table::CJ(n = c(1e4, 1e5), p = c(10, 50)) # like expand.grid() #' print(args) #' #' # Map function to create jobs #' ids = batchMap(fun, args = args, reg = tmp) #' #' # Set resources: enable memory measurement #' res = list(measure.memory = TRUE) #' #' # Submit jobs using the currently configured cluster functions #' submitJobs(ids, resources = res, reg = tmp) #' #' # Retrive information about memory, combine with parameters #' info = ijoin(getJobStatus(reg = tmp)[, .(job.id, mem.used)], getJobPars(reg = tmp)) #' print(unwrap(info)) #' #' # Combine job info with results -> each job is aggregated using mean() #' unwrap(ijoin(info, reduceResultsDataTable(fun = function(res) list(res = mean(res)), reg = tmp))) #' #' ### Example 3: Multicore execution on the slave #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' #' # Function which sleeps 10 seconds, i-times #' f = function(i) { #' parallelMap::parallelMap(Sys.sleep, rep(10, i)) #' } #' #' # Create one job with parameter i=4 #' ids = batchMap(f, i = 4, reg = tmp) #' #' # Set resources: Use parallelMap in multicore mode with 4 CPUs #' # batchtools internally loads the namespace of parallelMap and then #' # calls parallelStart() before the job and parallelStop() right #' # after the job last job in the chunk terminated. #' res = list(pm.backend = "multicore", ncpus = 4) #' #' \dontrun{ #' # Submit both jobs and wait for them #' submitJobs(resources = res, reg = tmp) #' waitForJobs(reg = tmp) #' #' # If successfull, the running time should be ~10s #' getJobTable(reg = tmp)[, .(job.id, time.running)] #' #' # There should also be a note in the log: #' grepLogs(pattern = "parallelMap", reg = tmp) #' } submitJobs = function(ids = NULL, resources = list(), sleep = NULL, reg = getDefaultRegistry()) { assertRegistry(reg, writeable = TRUE, sync = TRUE) assertList(resources, names = "strict") resources = insert(reg$default.resources, resources) if (hasName(resources, "pm.backend")) assertChoice(resources$pm.backend, c("local", "multicore", "socket", "mpi")) if (hasName(resources, "foreach.backend")) assertChoice(resources$foreach.backend, c("seq", "parallel", "mpi")) if (hasName(resources, "pm.opts")) assertList(resources$pm.opts, names = "unique") if (hasName(resources, "ncpus")) assertCount(resources$ncpus, positive = TRUE) if (hasName(resources, "measure.memory")) assertFlag(resources$measure.memory) sleep = getSleepFunction(reg, sleep) ids = convertIds(reg, ids, default = .findNotSubmitted(reg = reg), keep.extra = c("chunk", batchtools$resources$per.job)) if (nrow(ids) == 0L) return(noIds()) # handle chunks use.chunking = hasName(ids, "chunk") && anyDuplicated(ids, by = "chunk") > 0L if (use.chunking) { ids$chunk = asInteger(ids$chunk, any.missing = FALSE) chunks = sort(unique(ids$chunk)) } else { chunks = ids$chunk = seq_row(ids) } # check for jobs already on system on.sys = .findOnSystem(reg = reg, cols = c("job.id", "batch.id")) ids.on.sys = on.sys[ids, nomatch = 0L, on = "job.id"] if (nrow(ids.on.sys) > 0L) stopf("Some jobs are already on the system, e.g. %i", ids.on.sys[1L, ]$job.id) # handle max.concurrent.jobs max.concurrent.jobs = assertCount(resources$max.concurrent.jobs, null.ok = TRUE) %??% assertCount(reg$max.concurrent.jobs, null.ok = TRUE) %??% NA_integer_ # handle chunks.as.arrayjobs chunks.as.arrayjobs = FALSE if (hasName(resources, "chunks.as.arrayjobs")) { assertFlag(resources$chunks.as.arrayjobs) if (resources$chunks.as.arrayjobs) { if (is.na(reg$cluster.functions$array.var)) { info("Ignoring resource 'chunks.as.arrayjobs', not supported by cluster functions '%s'", reg$cluster.functions$name) } else { chunks.as.arrayjobs = TRUE } } } if (!is.na(max.concurrent.jobs)) { if (uniqueN(on.sys, by = "batch.id") + (!chunks.as.arrayjobs) * length(chunks) + chunks.as.arrayjobs * nrow(ids) > max.concurrent.jobs) { "!DEBUG [submitJobs]: Limiting the number of concurrent jobs to `max.concurrent.jobs`" } else { max.concurrent.jobs = NA_integer_ } } # handle job resources per.job.resources = chintersect(names(ids), batchtools$resources$per.job) if (length(per.job.resources) > 0L) { if (use.chunking) stopf("Combining per-job resources with chunking is not supported") ids$resource.id = addResources(reg, .mapply(function(...) insert(resources, list(...)), ids[, per.job.resources, with = FALSE], MoreArgs = list())) ids[, (per.job.resources) := NULL] } else { ids$resource.id = addResources(reg, list(resources)) } info("Submitting %i jobs in %i chunks using cluster functions '%s' ...", nrow(ids), length(chunks), reg$cluster.functions$name) on.exit(saveRegistry(reg)) chunk = NULL runHook(reg, "pre.submit") pb = makeProgressBar(total = length(chunks), format = ":status [:bar] :percent eta: :eta") pb$tick(0, tokens = list(status = "Submitting")) for (ch in chunks) { ids.chunk = ids[chunk == ch, c("job.id", "resource.id")] jc = makeJobCollection(ids.chunk, resources = reg$resources[ids.chunk, on = "resource.id"]$resources[[1L]], reg = reg) if (reg$cluster.functions$store.job.collection) writeRDS(jc, file = jc$uri, compress = jc$compress) # do we have to wait for jobs to get terminated before proceeding? if (!is.na(max.concurrent.jobs)) { # count chunks or job.id i = 1L repeat { n.on.sys = uniqueN(getBatchIds(reg), by = "batch.id") "!DEBUG [submitJobs]: Detected `n.on.sys` batch jobs on system (`max.concurrent.jobs` allowed concurrently)" if (n.on.sys < max.concurrent.jobs) break pb$tick(0, tokens = list(status = "Waiting ")) sleep(i) i = i + 1L } } # remove old result files fns = getResultFiles(reg, ids.chunk) file_remove(fns) i = 1L repeat { runHook(reg, "pre.submit.job") now = ustamp() submit = reg$cluster.functions$submitJob(reg = reg, jc = jc) if (submit$status == 0L) { if (!testCharacter(submit$batch.id, any.missing = FALSE, min.len = 1L)) { stopf("Cluster function did not return valid batch ids:\n%s", stri_flatten(capture.output(str(submit$batch.id)), "\n")) } reg$status[ids.chunk, c("submitted", "started", "done", "error", "mem.used", "resource.id", "batch.id", "log.file", "job.hash") := list(now, NA_real_, NA_real_, NA_character_, NA_real_, ids.chunk$resource.id, submit$batch.id, submit$log.file, jc$job.hash)] runHook(reg, "post.submit.job") break } else if (submit$status > 0L && submit$status < 100L) { # temp error pb$tick(0, tokens = list(status = submit$msg)) sleep(i) i = i + 1L } else if (submit$status > 100L && submit$status <= 200L) { # fatal error stopf("Fatal error occurred: %i. %s", submit$status, submit$msg) } } pb$tick(len = 1, tokens = list(status = "Submitting")) } Sys.sleep(reg$cluster.functions$scheduler.latency) runHook(reg, "post.submit") # return ids, registry is saved via on.exit() return(invisible(ids)) } addResources = function(reg, resources) { ai = function(tab, col) { # auto increment by reference i = tab[is.na(get(col)), which = TRUE] if (length(i) > 0L) { ids = seq_along(i) if (length(i) < nrow(tab)) ids = ids + max(tab[, max(col, na.rm = TRUE), with = FALSE][[1L]], na.rm = TRUE) tab[i, (col) := ids] setkeyv(tab, col)[] } } tab = data.table(resources = resources, resource.hash = vcapply(resources, digest)) new.tab = unique(tab, by = "resource.hash")[!reg$resources, on = "resource.hash"] if (nrow(new.tab)) { reg$resources = rbindlist(list(reg$resources, new.tab), fill = TRUE, use.names = TRUE) ai(reg$resources, "resource.id") } reg$resources[tab, "resource.id", on = "resource.hash"][[1L]] } ================================================ FILE: R/summarizeExperiments.R ================================================ #' @title Quick Summary over Experiments #' #' @description #' Returns a frequency table of defined experiments. #' See \code{\link{ExperimentRegistry}} for an example. #' #' @templateVar ids.default all #' @template ids #' @param by [\code{character}]\cr #' Split the resulting table by columns of \code{\link{getJobPars}}. #' @template expreg #' @return [\code{\link[data.table]{data.table}}] of frequencies. #' @export #' @family Experiment summarizeExperiments = function(ids = NULL, by = c("problem", "algorithm"), reg = getDefaultRegistry()) { assertRegistry(reg, class = "ExperimentRegistry") assertCharacter(by, any.missing = FALSE, min.chars = 1L, min.len = 1L, unique = TRUE) tab = getJobPars(ids = ids, reg = reg) if (!setequal(by, c("problem", "algorithm"))) tab = unwrap(tab) tab[, list(.count = .N), by = by] } ================================================ FILE: R/sweepRegistry.R ================================================ #' @title Check Consistency and Remove Obsolete Information #' #' @description #' Canceled jobs and jobs submitted multiple times may leave stray files behind. #' This function checks the registry for consistency and removes obsolete files #' and redundant data base entries. #' #' @template reg #' @family Registry #' @export sweepRegistry = function(reg = getDefaultRegistry()) { assertRegistry(reg, writeable = TRUE, sync = TRUE, running.ok = FALSE) "!DEBUG [sweepRegistry]: Running sweepRegistry" submitted = reg$status[.findSubmitted(reg = reg), c("job.id", "job.hash")] obsolete = chsetdiff( list.files(dir(reg, "results"), full.names = TRUE), getResultFiles(reg, submitted) ) if (length(obsolete)) { info("Removing %i obsolete result files ...", length(obsolete)) fs::file_delete(obsolete) } obsolete = chsetdiff( list.files(dir(reg, "logs"), full.names = TRUE), getLogFiles(reg, submitted) ) if (length(obsolete)) { info("Removing %i obsolete log files ...", length(obsolete)) fs::file_delete(obsolete) } obsolete = list.files(dir(reg, "jobs"), pattern = "\\.rds", full.names = TRUE) if (length(obsolete)) { info("Removing %i obsolete job collection files ...", length(obsolete)) fs::file_delete(obsolete) } obsolete = list.files(dir(reg, "jobs"), pattern = "\\.job$", full.names = TRUE) if (length(obsolete)) { info("Removing %i job description files ...", length(obsolete)) fs::file_delete(obsolete) } obsolete = chsetdiff( list.files(dir(reg, "external"), pattern = "^[0-9]+$", full.names = TRUE), getExternalDirs(reg, submitted) ) if (length(obsolete)) { info("Removing %i external directories of unsubmitted jobs ...", length(obsolete)) fs::dir_delete(obsolete) } obsolete = reg$resources[!reg$status, on = "resource.id", which = TRUE] if (length(obsolete)) { info("Removing %i resource specifications ...", length(obsolete)) reg$resources = reg$resources[-obsolete] } obsolete = reg$tags[!reg$status, on = "job.id", which = TRUE] if (length(obsolete)) { info("Removing %i tags ...", length(obsolete)) reg$tags = reg$tags[-obsolete] } saveRegistry(reg) } ================================================ FILE: R/syncRegistry.R ================================================ #' @title Synchronize the Registry #' #' @description #' Parses update files written by the slaves to the file system and updates the #' internal data base. #' #' @template reg #' @return [\code{logical(1)}]: \code{TRUE} if the state has changed, \code{FALSE} otherwise. #' @family Registry #' @export syncRegistry = function(reg = getDefaultRegistry()) { assertRegistry(reg) merged = sync(reg) if (length(merged)) { saveRegistry(reg) file_remove(merged) } length(merged) > 0L } sync = function(reg) { "!DEBUG [syncRegistry]: Triggered syncRegistry" fns = list.files(dir(reg, "updates"), full.names = TRUE) if (length(fns) == 0L) return(character()) runHook(reg, "pre.sync", fns = fns) updates = lapply(fns, function(fn) { x = try(readRDS(fn), silent = TRUE) if (is.error(x)) { if (reg$writeable && difftime(Sys.time(), fs::file_info(fn)$modification_time, units = "mins") > 60) { info("Removing unreadable update file '%s'", fn) file_remove(fn) } else { info("Skipping unreadable update file '%s'", fn) } return(NULL) } return(x) }) failed = vlapply(updates, is.null) updates = rbindlist(updates, fill = TRUE, use.names = TRUE) # -> fill = TRUE for #135 if (nrow(updates) > 0L) { expr = quote(`:=`(started = i.started, done = i.done, error = i.error, mem.used = i.mem.used)) reg$status[updates, eval(expr), on = "job.id"] } runHook(reg, "post.sync", updates = updates) if (reg$writeable) fns[!failed] else character() } ================================================ FILE: R/testJob.R ================================================ #' @title Run Jobs Interactively #' #' @description #' Starts a single job on the local machine. #' #' @template id #' @param external [\code{logical(1)}]\cr #' Run the job in an external R session? If \code{TRUE}, starts a fresh R #' session on the local machine to execute the with \code{\link{execJob}}. #' You will not be able to use debug tools like \code{\link[base]{traceback}} #' or \code{\link[base]{browser}}. #' #' If \code{external} is set to \code{FALSE} (default) on the other hand, #' \code{testJob} will execute the job in the current R session and the usual #' debugging tools work. However, spotting missing variable declarations (as they #' are possibly resolved in the global environment) is impossible. #' Same holds for missing package dependency declarations. #' #' @template reg #' @return Returns the result of the job if successful. #' @export #' @family debug #' @examples #' \dontshow{ batchtools:::example_push_temp(1) } #' tmp = makeRegistry(file.dir = NA, make.default = FALSE) #' batchMap(function(x) if (x == 2) xxx else x, 1:2, reg = tmp) #' testJob(1, reg = tmp) #' \dontrun{ #' testJob(2, reg = tmp) #' } testJob = function(id, external = FALSE, reg = getDefaultRegistry()) { assertRegistry(reg) assertFlag(external) id = convertId(reg, id) if (external) { td = fs::path_abs(fs::path_temp()) fn.r = fs::path(td, "batchtools-testJob.R") fn.jc = fs::path(td, "batchtools-testJob.jc") fn.res = fs::path(td, "batchtools-testJob.rds") writeRDS(makeJobCollection(id, reg = reg), file = fn.jc, compress = reg$compress) brew(file = system.file(fs::path("templates", "testJob.tmpl"), package = "batchtools", mustWork = TRUE), output = fn.r, envir = list2env(list(jc = fn.jc, result = fn.res))) file_remove(fn.res) res = runOSCommand(Rscript(), fn.r) writeLines(res$output) if (res$exit.code == 0L && file.exists(fn.res)) return(readRDS(fn.res)) stopf("testJob() failed for job with id=%i. To properly debug, re-run with external=FALSE", id$job.id) } else { with_dir(reg$work.dir, { loadRegistryDependencies(reg, must.work = TRUE) execJob(job = makeJob(id, reg = reg)) }) } } ================================================ FILE: R/unwrap.R ================================================ #' @title Unwrap Nested Data Frames #' #' @description #' Some functions (e.g., \code{\link{getJobPars}}, \code{\link{getJobResources}} or \code{\link{reduceResultsDataTable}} #' return a \code{data.table} with columns of type \code{list}. #' These columns can be unnested/unwrapped with this function. #' The contents of these columns will be transformed to a \code{data.table} and \code{\link[base]{cbind}}-ed #' to the input data.frame \code{x}, replacing the original nested column. #' #' @note #' There is a name clash with function \code{flatten} in package \pkg{purrr}. #' The function \code{flatten} is discouraged to use for this reason in favor of \code{unwrap}. #' #' @param x [\code{\link{data.frame}} | \code{\link[data.table]{data.table}}]\cr #' Data frame to flatten. #' @param cols [\code{character}]\cr #' Columns to consider for this operation. If set to \code{NULL} (default), #' will operate on all columns of type \dQuote{list}. #' @param sep [\code{character(1)}]\cr #' If \code{NULL} (default), the column names of the additional columns will re-use the names #' of the nested \code{list}/\code{data.frame}. #' This may lead to name clashes. #' If you provide \code{sep}, the variable column name will be constructed as #' \dQuote{[column name of x][sep][inner name]}. #' @return [\code{\link[data.table]{data.table}}]. #' @export #' @examples #' x = data.table::data.table( #' id = 1:3, #' values = list(list(a = 1, b = 3), list(a = 2, b = 2), list(a = 3)) #' ) #' unwrap(x) #' unwrap(x, sep = ".") unwrap = function(x, cols = NULL, sep = NULL) { assertDataFrame(x) if (!is.data.table(x)) x = as.data.table(x) if (is.null(cols)) { cols = names(x)[vlapply(x, is.list)] } else { assertNames(cols, "unique", subset.of = names(x)) qassertr(x[, cols, with = FALSE], "l") } assertString(sep, null.ok = TRUE) res = data.table(.row = seq_row(x), key = ".row") extra.cols = chsetdiff(names(x), cols) if (length(extra.cols)) res = cbind(res, x[, extra.cols, with = FALSE]) for (col in cols) { xc = x[[col]] new.cols = lapply(xc, function(x) { if (!is.null(x)) { ii = !vlapply(x, qtest, c("l", "d", "v1")) # FIXME: add parameter `which` to qtestr x[ii] = lapply(x[ii], list) na = which(is.na(names2(x))) if (length(na) > 0L) names(x)[na] = sprintf("%s.%i", col, seq_along(na)) } x }) new.cols = rbindlist(new.cols, fill = TRUE, idcol = ".row", use.names = TRUE) if (ncol(new.cols) > 1L) { if (nrow(new.cols) > nrow(x) || anyDuplicated(new.cols, by = ".row") > 0L) stopf("Some rows are unsuitable for unnesting. Unwrapping row in column '%s' leads to multiple rows", col) if (!is.null(sep)) { nn = setdiff(names(new.cols), ".row") setnames(new.cols, nn, stri_paste(col, nn, sep = sep)) } clash = chsetdiff(chintersect(names(res), names(new.cols)), ".row") if (length(clash) > 0L) stopf("Name clash while unwrapping data.table: Duplicated column names: %s", stri_flatten(clash, ", ")) res = merge(res, new.cols, all.x = TRUE, by = ".row") } } res[, ".row" := NULL] kx = key(x) if (!is.null(kx) && all(kx %chin% names(res))) setkeyv(res, kx) res[] } #' @rdname unwrap #' @export flatten = function(x, cols = NULL, sep = NULL) { #nocov start "!DEBUG Call of soon-to-be deprecated function flatten. Use unwrap() instead!" unwrap(x, cols, sep) } #nocov end ================================================ FILE: R/updateRegisty.R ================================================ # returns TRUE if the state possibly changed updateRegistry = function(reg = getDefaultRegistry()) { # nocov start "!DEBUG [updateRegistry]: Running updateRegistry" pv = packageVersion("batchtools") if (identical(pv, reg$version)) return(FALSE) if (is.null(reg$version) || reg$version < "0.9.0") stop("Your registry is too old.") if (reg$version < "0.9.1-9000") { ### hotfix for timestamps if (is.integer(reg$status$submitted)) { info("Converting timestamps to numeric") for (x in c("submitted", "started", "done")) reg$status[[x]] = as.numeric(reg$status[[x]]) } ### hotfix for log.file column if ("log.file" %chnin% names(reg$status)) { info("Adding column 'log.file'") reg$status[, ("log.file") := rep(NA_character_, .N)] } } if (reg$version < "0.9.1-9001") { ### hotfix for base32 encoding of exports fns = list.files(fs::path(reg$file.dir, "exports"), pattern = "\\.rds$", all.files = TRUE, no.. = TRUE) if (length(fns)) { info("Renaming export files") fs::file_move( fs::path(reg$file.dir, fns), fs::path(reg$file.dir, mangle(stri_sub(fns, to = -5L))) ) } } if (reg$version < "0.9.1-9002" && inherits(reg, "ExperimentRegistry")) { info("Renaming problems and algorithm files") getProblemIds = function(reg) levels(reg$defs$problem) getAlgorithmIds = function(reg) levels(reg$defs$algorithm) for (prob in getProblemIds(reg)) fs::file_move(fs::path(reg$file.dir, "problems", sprintf("%s.rds", digest(prob))), getProblemURI(reg, prob)) for (algo in getAlgorithmIds(reg)) fs::file_move(fs::path(reg$file.dir, "algorithms", sprintf("%s.rds", digest(algo))), getAlgorithmURI(reg, algo)) } if (reg$version < "0.9.4-9001") { if ("job.name" %chnin% names(reg$status)) { info("Adding column 'job.name'") reg$status[, ("job.name") := rep(NA_character_, .N)] } } if (reg$version < "0.9.6-9001") { info("Updating registry internals") if (!inherits(reg, "ExperimentRegistry")) { setnames(reg$defs, "pars", "job.pars") } else { alloc.col(reg$defs, ncol(reg$defs) + 1L) reg$problems = levels(reg$defs$problem) reg$algorithms = levels(reg$defs$algorithm) reg$defs$problem = as.character(reg$defs$problem) reg$defs$algorithm = as.character(reg$defs$algorithm) reg$defs$prob.pars = lapply(reg$defs$pars, `[[`, "prob.pars") reg$defs$algo.pars = lapply(reg$defs$pars, `[[`, "algo.pars") reg$defs$pars = NULL info("Recalculating job hashes") reg$defs$pars.hash = calculateHash(reg$defs) } } if (reg$version < "0.9.7-9001") { if (inherits(reg, "ExperimentRegistry")) { info("Updating problems") for (id in reg$problems) { uri = getProblemURI(reg, id) p = readRDS(uri) p$cache = FALSE saveRDS(p, file = uri, version = 2L) } } } if (reg$version < "0.9.7-9002") { if (hasName(reg$status, "memory")) { info("Renaming memory column in data base") setnames(reg$status, "memory", "mem.used") } fns = list.files(dir(reg, "updates"), full.names = TRUE) if (length(fns) > 0L) { info("Renaming memory column in update files") updates = lapply(fns, function(fn) { x = try(readRDS(fn), silent = TRUE) if (is.error(x)) { fs::file_delete(x) } else { if (hasName(x, "memory")) { setnames(x, "memory", "mem.used") saveRDS(x, file = fn, version = 2L) } } }) } } if (is.null(reg$compress)) { reg$compress = "gzip" } reg$version = pv return(TRUE) } # nocov end ================================================ FILE: R/waitForFiles.R ================================================ # use list.files() here as this seems to trick the nfs cache # see https://github.com/mlr-org/batchtools/issues/85 waitForFiles = function(path, fns, timeout = 0) { if (timeout == 0) return(TRUE) fns = fns[!fs::file_exists(fns)] if (length(fns) == 0L) return(TRUE) "!DEBUG [waitForFiles]: `length(fns)` files not found via 'file.exists()'" fns = chsetdiff(fns, list.files(path, all.files = TRUE)) if (length(fns) == 0L) return(TRUE) timeout = timeout + Sys.time() repeat { Sys.sleep(0.5) fns = chsetdiff(fns, list.files(path, all.files = TRUE)) if (length(fns) == 0L) return(TRUE) if (Sys.time() > timeout) stopf("Timeout while waiting for %i files, e.g. '%s'", length(fns), fns[1L]) } } waitForFile = function(fn, timeout = 0, must.work = TRUE) { if (timeout == 0 || fs::file_exists(fn)) return(TRUE) "!DEBUG [waitForFile]: `fn` not found via 'file.exists()'" timeout = timeout + Sys.time() path = fs::path_dir(fn) repeat { Sys.sleep(0.5) if (basename(fn) %chin% list.files(path, all.files = TRUE)) return(TRUE) if (Sys.time() > timeout) { if (must.work) stopf("Timeout while waiting for file '%s'", fn) return(FALSE) } } } ================================================ FILE: R/waitForJobs.R ================================================ #' @title Wait for Termination of Jobs #' #' @description #' This function simply waits until all jobs are terminated. #' #' @templateVar ids.default findSubmitted #' @template ids #' @param sleep [\code{function(i)} | \code{numeric(1)}]\cr #' Parameter to control the duration to sleep between queries. #' You can pass an absolute numeric value in seconds or a \code{function(i)} which returns #' the number of seconds to sleep in the \code{i}-th iteration. #' If not provided (\code{NULL}), tries to read the value (number/function) from the configuration file #' (stored in \code{reg$sleep}) or defaults to a function with exponential backoff between #' 5 and 120 seconds. #' @param timeout [\code{numeric(1)}]\cr #' After waiting \code{timeout} seconds, show a message and return #' \code{FALSE}. This argument may be required on some systems where, e.g., #' expired jobs or jobs on hold are problematic to detect. If you don't want #' a timeout, set this to \code{Inf}. Default is \code{604800} (one week). #' @param expire.after [\code{integer(1)}]\cr #' Jobs count as \dQuote{expired} if they are not found on the system but have not communicated back #' their results (or error message). This frequently happens on managed system if the scheduler kills #' a job because the job has hit the walltime or request more memory than reserved. #' On the other hand, network file systems often require several seconds for new files to be found, #' which can lead to false positives in the detection heuristic. #' \code{waitForJobs} treats such jobs as expired after they have not been detected on the system #' for \code{expire.after} iterations. #' If not provided (\code{NULL}), tries to read the value from the configuration file (stored in \code{reg$expire.after}), #' and finally defaults to \code{3}. #' @param stop.on.error [\code{logical(1)}]\cr #' Immediately cancel if a job terminates with an error? Default is #' \code{FALSE}. #' @param stop.on.expire [\code{logical(1)}]\cr #' Immediately cancel if jobs are detected to be expired? Default is \code{FALSE}. #' Expired jobs will then be ignored for the remainder of \code{waitForJobs()}. #' @template reg #' @return [\code{logical(1)}]. Returns \code{TRUE} if all jobs terminated #' successfully and \code{FALSE} if either the timeout is reached or at least #' one job terminated with an exception or expired. #' @export waitForJobs = function(ids = NULL, sleep = NULL, timeout = 604800, expire.after = NULL, stop.on.error = FALSE, stop.on.expire = FALSE, reg = getDefaultRegistry()) { waitForResults = function(ids) { waitForFiles( fs::path(reg$file.dir, "results"), sprintf("%i.rds", .findDone(reg, ids)$job.id), reg$cluster.functions$fs.latency ) } assertRegistry(reg, sync = TRUE) assertNumber(timeout, lower = 0) assertFlag(stop.on.error) assertFlag(stop.on.expire) expire.after = assertCount(expire.after, positive = TRUE, null.ok = TRUE) %??% reg$expire.after %??% 3L sleep = getSleepFunction(reg, sleep) ids = convertIds(reg, ids, default = .findSubmitted(reg = reg)) if (nrow(.findNotSubmitted(ids = ids, reg = reg)) > 0L) { warning("Cannot wait for unsubmitted jobs. Removing from ids.") ids = ids[.findSubmitted(ids = ids, reg = reg), nomatch = 0L] } if (nrow(ids) == 0L) { return(TRUE) } terminated = on.sys = expire.counter = NULL ids$terminated = FALSE ids$on.sys = FALSE ids$expire.counter = 0L timeout = Sys.time() + timeout pb = makeProgressBar(total = nrow(ids), format = "Waiting (Q::queued R::running D::done E::error ?::expired) [:bar] :percent eta: :eta") i = 0L repeat { ### case 1: all jobs terminated or expired -> nothing on system ids[.findTerminated(reg, ids), "terminated" := TRUE] if (ids[!(terminated) & expire.counter <= expire.after, .N] == 0L) { "!DEBUG [waitForJobs]: All jobs terminated" pb$update(1) waitForResults(ids) return(nrow(.findDone(reg, ids)) == nrow(ids)) } ### case 2: there are errors and stop.on.error is TRUE if (stop.on.error && nrow(.findErrors(reg, ids)) > 0L) { "!DEBUG [waitForJobs]: Errors found and stop.on.error is TRUE" pb$update(1) return(FALSE) } batch.ids = getBatchIds(reg) ids[, "on.sys" := FALSE][.findOnSystem(reg, ids, batch.ids = batch.ids), "on.sys" := TRUE] ids[(on.sys), "expire.counter" := 0L] ids[!(on.sys) & !(terminated), "expire.counter" := expire.counter + 1L] stats = getStatusTable(ids = ids, batch.ids = batch.ids, reg = reg) pb$update(mean(ids$terminated), tokens = as.list(stats)) "!DEBUG [waitForJobs]: batch.ids: `stri_flatten(batch.ids$batch.id, ',')`" ### case 3: jobs disappeared, we cannot find them on the system after [expire.after] iterations if (stop.on.expire && ids[!(terminated) & expire.counter > expire.after, .N] > 0L) { warning("Jobs disappeared from the system") pb$update(1) waitForResults(ids) return(FALSE) } # case 4: we reach a timeout sleep(i) i = i + 1L if (Sys.time() > timeout) { pb$update(1) warning("Timeout reached") return(FALSE) } merged = suppressMessages(sync(reg = reg)) if (length(merged)) { saveRegistry(reg) file_remove(merged) } } } ================================================ FILE: R/zzz.R ================================================ #' @description #' For bug reports and feature requests please use the tracker: #' \url{https://github.com/mlr-org/batchtools}. #' #' @section Package options: #' \describe{ #' \item{\code{batchtools.verbose}}{ #' Verbosity. Set to \code{FALSE} to suppress info messages and progress bars. #' } #' \item{\code{batchtools.progress}}{ #' Progress bars. Set to \code{FALSE} to disable them. #' } #' \item{\code{batchtools.timestamps}}{ #' Add time stamps to log output. Set to \code{FALSE} to disable them. #' } #' } #' Furthermore, you may enable a debug mode using the \pkg{debugme} package by #' setting the environment variable \dQuote{DEBUGME} to \dQuote{batchtools} before #' loading \pkg{batchtools}. #' @import utils #' @import checkmate #' @import stringi #' @import data.table #' @importFrom R6 R6Class #' @importFrom digest digest #' @importFrom brew brew #' @importFrom progress progress_bar #' @importFrom rappdirs user_config_dir site_config_dir #' @importFrom stats runif predict pexp #' @importFrom base64url base32_encode base32_decode #' @importFrom withr with_dir with_seed local_options local_dir "_PACKAGE" batchtools = new.env(parent = emptyenv()) batchtools$debug = FALSE batchtools$hooks = list( remote = c("pre.do.collection", "post.do.collection"), local = c("pre.sync", "post.sync", "pre.submit.job", "post.submit.job", "pre.submit", "post.submit", "pre.kill", "post.kill") ) batchtools$resources = list( per.job = c("walltime", "memory", "ncpus", "omp.threads", "blas.threads"), per.chunk = c("measure.memory", "chunks.as.arrayjobs", "pm.backend", "foreach.backend") ) .onLoad = function(libname, pkgname) { # nocov start if (requireNamespace("debugme", quietly = TRUE) && "batchtools" %in% strsplit(Sys.getenv("DEBUGME"), ",", fixed = TRUE)[[1L]]) { debugme::debugme() batchtools$debug = TRUE } backports::import(pkgname, "...length") backports::import(pkgname, "hasName", force = TRUE) } # nocov end .onUnload = function (libpath) { # nocov start library.dynam.unload("batchtools", libpath) } # nocov end ================================================ FILE: README.Rmd ================================================ --- output: github_document --- # batchtools Package website: [release](https://batchtools.mlr-org.com/) | [dev](https://batchtools.mlr-org.com/dev/) [![JOSS Publication](https://joss.theoj.org/papers/10.21105/joss.00135/status.svg)](https://doi.org/10.21105/joss.00135) [![r-cmd-check](https://github.com/mlr-org/batchtools/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/batchtools/actions/workflows/r-cmd-check.yml) [![CRAN Status](https://www.r-pkg.org/badges/version-ago/batchtools)](https://cran.r-project.org/package=batchtools) [![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) As a successor of the packages [BatchJobs](https://github.com/tudo-r/BatchJobs) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments), batchtools provides a parallel implementation of Map for high performance computing systems managed by schedulers like Slurm, Sun Grid Engine, OpenLava, TORQUE/OpenPBS, Load Sharing Facility (LSF) or Docker Swarm (see the setup section in the [vignette](https://batchtools.mlr-org.com/articles/batchtools.html)). Main features: * Convenience: All relevant batch system operations (submitting, listing, killing) are either handled internally or abstracted via simple R functions * Portability: With a well-defined interface, the source is independent from the underlying batch system - prototype locally, deploy on any high performance cluster * Reproducibility: Every computational part has an associated seed stored in a data base which ensures reproducibility even when the underlying batch system changes * Abstraction: The code layers for algorithms, experiment definitions and execution are cleanly separated and allow to write readable and maintainable code to manage large scale computer experiments ## Installation Install the stable version from CRAN: ```{R, eval = FALSE} install.packages("batchtools") ``` For the development version, use [devtools](https://cran.r-project.org/package=devtools): ```{R, eval = FALSE} devtools::install_github("mlr-org/batchtools") ``` Next, you need to setup `batchtools` for your HPC (it will run sequentially otherwise). See the [vignette](https://batchtools.mlr-org.com/articles/batchtools.html) for instructions. ## Why batchtools? The development of [BatchJobs](https://github.com/tudo-r/BatchJobs/) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments) is discontinued for the following reasons: * Maintainability: The packages [BatchJobs](https://github.com/tudo-r/BatchJobs/) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments) are tightly connected which makes maintenance difficult. Changes have to be synchronized and tested against the current CRAN versions for compatibility. Furthermore, BatchExperiments violates CRAN policies by calling internal functions of BatchJobs. * Data base issues: Although we invested weeks to mitigate issues with locks of the SQLite data base or file system (staged queries, file system timeouts, ...), `BatchJobs` kept working unreliable on some systems with high latency under certain conditions. This made `BatchJobs` unusable for many users. [BatchJobs](https://github.com/tudo-r/BatchJobs/) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments) will remain on CRAN, but new features are unlikely to be ported back. The [vignette](https://batchtools.mlr-org.com/articles/batchtools.html) contains a section comparing the packages. ## Resources * [Function reference](https://batchtools.mlr-org.com/reference/) * [Vignette](https://batchtools.mlr-org.com/articles/batchtools.html) * [JOSS Paper](https://doi.org/10.21105/joss.00135): Short paper on batchtools. Please cite this if you use batchtools. * [Paper on BatchJobs/BatchExperiments](https://www.jstatsoft.org/v64/i11): The described concept still holds for batchtools and most examples work analogously (see the [vignette](https://batchtools.mlr-org.com/articles/batchtools.html) for differences between the packages). ## Citation Please cite the [JOSS paper](https://doi.org/10.21105/joss.00135) using the following BibTeX entry: ``` @article{, doi = {10.21105/joss.00135}, url = {https://doi.org/10.21105/joss.00135}, year = {2017}, month = {feb}, publisher = {The Open Journal}, volume = {2}, number = {10}, author = {Michel Lang and Bernd Bischl and Dirk Surmann}, title = {batchtools: Tools for R to work on batch systems}, journal = {The Journal of Open Source Software} } ``` ## Related Software * The [High Performance Computing Task View](https://cran.r-project.org/view=HighPerformanceComputing) lists the most relevant packages for scientific computing with R. * [clustermq](https://cran.r-project.org/package=clustermq) is a similar approach which also supports multiple schedulers. Uses the ZeroMQ network protocol for communication, and shines if you have millions of fast jobs. * [batch](https://cran.r-project.org/package=batch) assists in splitting and submitting jobs to LSF and MOSIX clusters. * [flowr](https://cran.r-project.org/package=flowr) supports LSF, Slurm, TORQUE and Moab and provides a scatter-gather approach to define computational jobs. * [future.batchtools](https://cran.r-project.org/package=future.batchtools) implements `batchtools` as backend for [future](https://cran.r-project.org/package=future.batchtools). * [doFuture](https://cran.r-project.org/package=doFuture) together with [future.batchtools](https://cran.r-project.org/package=future.batchtools) connects `batchtools` to [foreach](https://cran.r-project.org/package=foreach). * [drake](https://cran.r-project.org/package=drake) uses graphs to define computational jobs. `batchtools` is used as a backend via [future.batchtools](https://cran.r-project.org/package=future.batchtools). ## Contributing to batchtools This R package is licensed under the [LGPL-3](https://www.gnu.org/licenses/lgpl-3.0.en.html). If you encounter problems using this software (lack of documentation, misleading or wrong documentation, unexpected behaviour, bugs, ...) or just want to suggest features, please open an issue in the [issue tracker](https://github.com/mlr-org/batchtools/issues). Pull requests are welcome and will be included at the discretion of the author. If you have customized a template file for your (larger) computing site, please share it: fork the repository, place your template in `inst/templates` and send a pull request. ================================================ FILE: README.md ================================================ # batchtools Package website: [release](https://batchtools.mlr-org.com/) \| [dev](https://batchtools.mlr-org.com/dev/) [![JOSS Publication](https://joss.theoj.org/papers/10.21105/joss.00135/status.svg)](https://doi.org/10.21105/joss.00135) [![r-cmd-check](https://github.com/mlr-org/batchtools/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/batchtools/actions/workflows/r-cmd-check.yml) [![CRAN Status](https://www.r-pkg.org/badges/version-ago/batchtools)](https://cran.r-project.org/package=batchtools) [![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) As a successor of the packages [BatchJobs](https://github.com/tudo-r/BatchJobs) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments), batchtools provides a parallel implementation of Map for high performance computing systems managed by schedulers like Slurm, Sun Grid Engine, OpenLava, TORQUE/OpenPBS, Load Sharing Facility (LSF) or Docker Swarm (see the setup section in the [vignette](https://batchtools.mlr-org.com/articles/batchtools.html)). Main features: - Convenience: All relevant batch system operations (submitting, listing, killing) are either handled internally or abstracted via simple R functions - Portability: With a well-defined interface, the source is independent from the underlying batch system - prototype locally, deploy on any high performance cluster - Reproducibility: Every computational part has an associated seed stored in a data base which ensures reproducibility even when the underlying batch system changes - Abstraction: The code layers for algorithms, experiment definitions and execution are cleanly separated and allow to write readable and maintainable code to manage large scale computer experiments ## Installation Install the stable version from CRAN: ``` r install.packages("batchtools") ``` For the development version, use [devtools](https://cran.r-project.org/package=devtools): ``` r devtools::install_github("mlr-org/batchtools") ``` Next, you need to setup `batchtools` for your HPC (it will run sequentially otherwise). See the [vignette](https://batchtools.mlr-org.com/articles/batchtools.html) for instructions. ## Why batchtools? The development of [BatchJobs](https://github.com/tudo-r/BatchJobs/) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments) is discontinued for the following reasons: - Maintainability: The packages [BatchJobs](https://github.com/tudo-r/BatchJobs/) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments) are tightly connected which makes maintenance difficult. Changes have to be synchronized and tested against the current CRAN versions for compatibility. Furthermore, BatchExperiments violates CRAN policies by calling internal functions of BatchJobs. - Data base issues: Although we invested weeks to mitigate issues with locks of the SQLite data base or file system (staged queries, file system timeouts, …), `BatchJobs` kept working unreliable on some systems with high latency under certain conditions. This made `BatchJobs` unusable for many users. [BatchJobs](https://github.com/tudo-r/BatchJobs/) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments) will remain on CRAN, but new features are unlikely to be ported back. The [vignette](https://batchtools.mlr-org.com/articles/batchtools.html) contains a section comparing the packages. ## Resources - [Function reference](https://batchtools.mlr-org.com/reference/) - [Vignette](https://batchtools.mlr-org.com/articles/batchtools.html) - [JOSS Paper](https://doi.org/10.21105/joss.00135): Short paper on batchtools. Please cite this if you use batchtools. - [Paper on BatchJobs/BatchExperiments](https://www.jstatsoft.org/v64/i11): The described concept still holds for batchtools and most examples work analogously (see the [vignette](https://batchtools.mlr-org.com/articles/batchtools.html) for differences between the packages). ## Citation Please cite the [JOSS paper](https://doi.org/10.21105/joss.00135) using the following BibTeX entry: @article{, doi = {10.21105/joss.00135}, url = {https://doi.org/10.21105/joss.00135}, year = {2017}, month = {feb}, publisher = {The Open Journal}, volume = {2}, number = {10}, author = {Michel Lang and Bernd Bischl and Dirk Surmann}, title = {batchtools: Tools for R to work on batch systems}, journal = {The Journal of Open Source Software} } ## Related Software - The [High Performance Computing Task View](https://cran.r-project.org/view=HighPerformanceComputing) lists the most relevant packages for scientific computing with R. - [clustermq](https://cran.r-project.org/package=clustermq) is a similar approach which also supports multiple schedulers. Uses the ZeroMQ network protocol for communication, and shines if you have millions of fast jobs. - [batch](https://cran.r-project.org/package=batch) assists in splitting and submitting jobs to LSF and MOSIX clusters. - [flowr](https://cran.r-project.org/package=flowr) supports LSF, Slurm, TORQUE and Moab and provides a scatter-gather approach to define computational jobs. - [future.batchtools](https://cran.r-project.org/package=future.batchtools) implements `batchtools` as backend for [future](https://cran.r-project.org/package=future.batchtools). - [doFuture](https://cran.r-project.org/package=doFuture) together with [future.batchtools](https://cran.r-project.org/package=future.batchtools) connects `batchtools` to [foreach](https://cran.r-project.org/package=foreach). - [drake](https://cran.r-project.org/package=drake) uses graphs to define computational jobs. `batchtools` is used as a backend via [future.batchtools](https://cran.r-project.org/package=future.batchtools). ## Contributing to batchtools This R package is licensed under the [LGPL-3](https://www.gnu.org/licenses/lgpl-3.0.en.html). If you encounter problems using this software (lack of documentation, misleading or wrong documentation, unexpected behaviour, bugs, …) or just want to suggest features, please open an issue in the [issue tracker](https://github.com/mlr-org/batchtools/issues). Pull requests are welcome and will be included at the discretion of the author. If you have customized a template file for your (larger) computing site, please share it: fork the repository, place your template in `inst/templates` and send a pull request. ================================================ FILE: _pkgdown.yml ================================================ url: https://batchtools.mlr-org.com template: bootstrap: 5 light-switch: true math-rendering: mathjax package: mlr3pkgdowntemplate development: mode: auto version_label: default version_tooltip: "Version" toc: depth: 3 navbar: structure: left: [reference, news, articles, book] right: [search, github, mattermost, stackoverflow, rss, lightswitch] components: home: ~ reference: icon: fa fa-file-alt text: Reference href: reference/index.html mattermost: icon: fa fa-comments href: https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/ book: text: mlr3book icon: fa fa-link href: https://mlr3book.mlr-org.com stackoverflow: icon: fab fa-stack-overflow href: https://stackoverflow.com/questions/tagged/mlr3 rss: icon: fa-rss href: https://mlr-org.com/ reference: - title: Overview contents: - batchtools - title: Registry contents: - Registry - ExperimentRegistry - assertRegistry - loadRegistry - saveRegistry - syncRegistry - sweepRegistry - removeRegistry - getDefaultRegistry - title: Define Jobs contents: - batchMap - batchReduce - batchMapResults - Problem - Algorithm - addExperiments - title: Submit Jobs contents: - submitJobs - batchExport - waitForJobs - chunk - lpt - binpack - setJobNames - title: Query Job Information contents: - getStatus - findJobs - getJobPars - summarizeExperiments - title: Retrieve Results contents: - reduceResults - reduceResultsDataTable - loadResult - unwrap - title: Debugging contents: - resetJobs - testJob - getLog - getErrorMessages - grepLogs - title: Remove Jobs contents: - killJobs - clearRegistry - removeExperiments - title: Additional objects contents: - Job - Experiment - JobCollection - title: Cluster Functions contents: - cfKillJob - cfBrewTemplate - cfReadBrewTemplate - cfHandleUnknownSubmitError - ClusterFunctions - starts_with("makeCluster") - SubmitJobResult - Hook - Worker - title: Miscellaneous contents: - Tags - btlapply - JoinTables - runOSCommand - execJob - doJobCollection - estimateRuntimes ================================================ FILE: docs/404.html ================================================ Page not found (404) • batchtools
Content not found. Please use links in the navbar.

Site built with pkgdown 1.6.1.

================================================ FILE: docs/CNAME ================================================ batchtools.mlr-org.com ================================================ FILE: docs/LICENSE-text.html ================================================ License • batchtools
                   GNU LESSER GENERAL PUBLIC LICENSE
                       Version 3, 29 June 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.


  This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.

  0. Additional Definitions.

  As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.

  "The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.

  An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.

  A "Combined Work" is a work produced by combining or linking an
Application with the Library.  The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".

  The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.

  The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.

  1. Exception to Section 3 of the GNU GPL.

  You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.

  2. Conveying Modified Versions.

  If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:

   a) under this License, provided that you make a good faith effort to
   ensure that, in the event an Application does not supply the
   function or data, the facility still operates, and performs
   whatever part of its purpose remains meaningful, or

   b) under the GNU GPL, with none of the additional permissions of
   this License applicable to that copy.

  3. Object Code Incorporating Material from Library Header Files.

  The object code form of an Application may incorporate material from
a header file that is part of the Library.  You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:

   a) Give prominent notice with each copy of the object code that the
   Library is used in it and that the Library and its use are
   covered by this License.

   b) Accompany the object code with a copy of the GNU GPL and this license
   document.

  4. Combined Works.

  You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:

   a) Give prominent notice with each copy of the Combined Work that
   the Library is used in it and that the Library and its use are
   covered by this License.

   b) Accompany the Combined Work with a copy of the GNU GPL and this license
   document.

   c) For a Combined Work that displays copyright notices during
   execution, include the copyright notice for the Library among
   these notices, as well as a reference directing the user to the
   copies of the GNU GPL and this license document.

   d) Do one of the following:

       0) Convey the Minimal Corresponding Source under the terms of this
       License, and the Corresponding Application Code in a form
       suitable for, and under terms that permit, the user to
       recombine or relink the Application with a modified version of
       the Linked Version to produce a modified Combined Work, in the
       manner specified by section 6 of the GNU GPL for conveying
       Corresponding Source.

       1) Use a suitable shared library mechanism for linking with the
       Library.  A suitable mechanism is one that (a) uses at run time
       a copy of the Library already present on the user's computer
       system, and (b) will operate properly with a modified version
       of the Library that is interface-compatible with the Linked
       Version.

   e) Provide Installation Information, but only if you would otherwise
   be required to provide such information under section 6 of the
   GNU GPL, and only to the extent that such information is
   necessary to install and execute a modified version of the
   Combined Work produced by recombining or relinking the
   Application with a modified version of the Linked Version. (If
   you use option 4d0, the Installation Information must accompany
   the Minimal Corresponding Source and Corresponding Application
   Code. If you use option 4d1, you must provide the Installation
   Information in the manner specified by section 6 of the GNU GPL
   for conveying Corresponding Source.)

  5. Combined Libraries.

  You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:

   a) Accompany the combined library with a copy of the same work based
   on the Library, uncombined with any other library facilities,
   conveyed under the terms of this License.

   b) Give prominent notice with the combined library that part of it
   is a work based on the Library, and explaining where to find the
   accompanying uncombined form of the same work.

  6. Revised Versions of the GNU Lesser General Public License.

  The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.

  Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.

  If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.

Site built with pkgdown 1.6.1.

================================================ FILE: docs/articles/batchtools.html ================================================ batchtools • batchtools

Setup

Cluster Functions

The communication with the batch system is managed via so-called cluster functions. They are created with the constructor makeClusterFunctions which defines how jobs are submitted on your system. Furthermore, you may provide functions to list queued/running jobs and to kill jobs.

Usually you do not have to start from scratch but can just use one of the cluster functions which ship with the package:

To use the package with the socket cluster functions, you would call the respective constructor makeClusterFunctionsSocket():

reg = makeRegistry(NA)
reg$cluster.functions = makeClusterFunctionsSocket(2)

To make this selection permanent for this registry, save the Registry with saveRegistry(). To make your cluster function selection permanent for a specific system across R sessions for all new Registries, you can set up a configuration file (see below).

If you have trouble debugging your cluster functions, you can enable the debug mode for extra output. To do so, install the debugme package and set the environment variable DEBUGME to batchtools before you load the batchtools package:

Sys.setenv(DEBUGME = "batchtools")
library(batchtools)

Template Files

Many cluster functions require a template file as argument. These templates are used to communicate with the scheduler and contain placeholders to evaluate arbitrary R expressions. Internally, the brew package is used for this purpose. Some exemplary template files can be found here. It would be great if you would help expand this collection to cover more exotic configurations. To do so, please send your template via mail or open a new pull request.

Note that all variables defined in a JobCollection can be used inside the template. If you need to pass extra variables, you can set them via the argument resources of submitJobs().

If the flexibility which comes with templating is not sufficient, you can still construct a custom cluster function implementation yourself using the provided constructor.

Configuration File

The configuration file can be used to set system specific options. Its default location depends on the operating system (see Registry), but for the first time setup you can put one in the current working directory (as reported by getwd()). In order to set the cluster function implementation, you would generate a file with the following content:

cluster.functions = makeClusterFunctionsInteractive()

The configuration file is parsed whenever you create or load a Registry. It is sourced inside of your registry which has the advantage that you can (a) access all of the parameters which are passed to makeRegistry and (b) you can also directly change them. Lets say you always want your working directory in your home directory and you always want to load the checkmate package on the nodes, you can just append these lines:

work.dir = "~"
packages = union(packages, "checkmate")

See the documentation on Registry for a more complete list of supported configuration options.

Migration from BatchJobs/Batchexperiments

The development of BatchJobs and BatchExperiments is discontinued because of the following reasons:

  • Maintainability: The packages BatchJobs and BatchExperiments are tightly connected which makes maintaining difficult. Changes have to be synchronized and tested against the current CRAN versions for compatibility. Furthermore, BatchExperiments violates CRAN policies by calling internal functions of BatchJobs.
  • Data base issues: Although we invested weeks to mitigate issues with locks of the SQLite data base or file system (staged queries, file system timeouts, …), BatchJobs kept working unreliable on some systems with high latency or specific file systems. This made BatchJobs unusable for many users.

BatchJobs and BatchExperiments will remain on CRAN, but new features are unlikely to be ported back.

Internal Changes

  • batchtools does not use SQLite anymore. Instead, all the information is stored directly in the registry using data.tables acting as an in-memory database. As a side effect, many operations are much faster.
  • Nodes do not have to access the registry. submitJobs() stores a temporary object of type JobCollection on the file system which holds all the information necessary to execute a chunk of jobs via doJobCollection() on the node. This avoids file system locks because each job accesses only one file exclusively.
  • ClusterFunctionsMulticore now uses the parallel package for multicore execution.
  • ClusterFunctionsSSH can still be used to emulate a scheduler-like system which respects the work load on the local machine. Setting the hostname to "localhost" just strips out ssh of the command issued.

Interface Changes

  • batchtools remembers the last created or loaded Registry and sets it as default registry. This way, you do not need to pass the registry around anymore. If you need to work with multiple registries simultaneously on the other hand, you can still do so by explicitly passing registries to the functions.
  • Most functions now return a data.table which is keyed with the job.id. This way, return values can be joined together easily and efficient (see this help page for some examples).
  • The building blocks of a problem has been renamed from static and dynamic to the more intuitive data and fun. Thus, algorithm function should have the formal arguments job, data and instance.
  • The function makeDesign has been removed. Parameters can be defined by just passing a data.frame or data.table to addExperiments. For exhaustive designs, use data.table::CJ().

Template changes

  • The scheduler should directly execute the command:
Rscript -e 'batchtools::doJobCollection(<filename>)'

There is no intermediate R source file like there was in BatchJobs. * All information stored in the object JobCollection can be accessed while brewing the template. * Extra variables may be passed via the argument resoures of submitJobs.

New features

  • Support for Docker Swarm via ClusterFunctionsDocker.
  • Jobs can now be tagged and untagged to provide an easy way to group them.
  • Some resources like the number of CPUs are now optionally passed to parallelMap. This eases nested parallelization, e.g. to use multicore parallelization on the slave by just setting a resource on the master. See submitJobs() for an example.
  • ClusterFunctions are now more flexible in general as they can define hook functions which will be called at certain events. ClusterFunctionsDocker is an example use case which implements a housekeeping routine. This routine is called every time before a job is about to get submitted to the scheduler (in the case: the Docker Swarm) via the hook pre.submit and every time directly after the registry synchronized jobs stored on the file system via the hook post.sync.
  • More new features are covered in the NEWS.

Porting to batchtools

The following table assists in porting to batchtools by mapping BatchJobs/BatchExperiments functions to their counterparts in batchtools. The table does not cover functions which are (a) used only internally in BatchJobs and (b) functions which have not been renamed.

BatchJobs batchtools
addRegistryPackages Set reg$packages or reg$namespaces, call saveRegistry()
addRegistrySourceDirs -
addRegistrySourceFiles Set reg$source, call saveRegistry()
batchExpandGrid batchMap: batchMap(..., args = CJ(x = 1:3, y = 1:10))
batchMapQuick btmapply
batchReduceResults -
batchUnexport batchExport
filterResults -
getJobIds findJobs
getJobInfo getJobStatus
getJob makeJob
getJobParamDf getJobPars
loadResults reduceResultsList
reduceResultsDataFrame reduceResultsDataTable
reduceResultsMatrix reduceResultsList + do.call(rbind, res)
reduceResultsVector reduceResultsDataTable
setJobFunction -
setJobNames -
showStatus getStatus

Example 1: Approximation of \(\pi\)

To get a first insight into the usage of batchtools, we start with an exemplary Monte Carlo simulation to approximate \(\pi\). For background information, see Wikipedia.

First, a so-called registry object has to be created, which defines a directory where all relevant information, files and results of the computational jobs will be stored. There are two different types of registry objects: First, a regular Registry which we will use in this example. Second, an ExperimentRegistry which provides an alternative way to define computational jobs and thereby is tailored for a broad range of large scale computer experiments. Here, we use a temporary registry which is stored in the temp directory of the system and gets automatically deleted if you close the R session.

reg = makeRegistry(file.dir = NA, seed = 1)

For a permanent registry, set the file.dir to a valid path. It can then be reused later, e.g., when you login to the system again, by calling the function loadRegistry(file.dir).

When a registry object is created or loaded, it is stored for the active R session as the default. Therefore the argument reg will be ignored in functions calls of this example, assuming the correct registry is set as default. To get the current default registry, getDefaultRegistry can be used. To switch to another registry, use setDefaultRegistry().

First, we create a function which samples \(n\) points \((x_i, y_i)\) whereas \(x_i\) and \(y_i\) are distributed uniformly, i.e. \(x_i, y_i \sim \mathcal{U}(0,1)\). Next, the distance to the origin \((0, 0)\) is calculated and the fraction of points in the unit circle (\(d \leq 1\)) is returned.

piApprox = function(n) {
  nums = matrix(runif(2 * n), ncol = 2)
  d = sqrt(nums[, 1]^2 + nums[, 2]^2)
  4 * mean(d <= 1)
}
set.seed(42)
piApprox(1000)
## [1] 3.156

We now parallelize piApprox() with batchtools: We create 10 jobs, each doing a MC simulation with \(10^5\) jobs. We use batchMap() to define the jobs (note that this does not yet start the calculation):

batchMap(fun = piApprox, n = rep(1e5, 10))
## Adding 10 jobs ...

The length of the vector or list defines how many different jobs are created, while the elements itself are used as arguments for the function. The function batchMap(fun, ...) works analogously to Map(f, ...) of the base package. An overview over the jobs and their IDs can be retrieved with getJobTable() which returns a data.frame with all relevant information:

##  [1] "job.id"       "submitted"    "started"      "done"         "error"       
##  [6] "mem.used"     "batch.id"     "log.file"     "job.hash"     "job.name"    
## [11] "time.queued"  "time.running" "job.pars"     "resources"    "tags"

Note that a unique job ID is assigned to each job. These IDs can be used to restrict operations to subsets of jobs. To actually start the calculation, call submitJobs(). The registry and the selected job IDs can be taken as arguments as well as an arbitrary list of resource requirements, which are to be handled by the cluster back end.

submitJobs(resources = list(walltime = 3600, memory = 1024))
## Submitting 10 jobs in 10 chunks using cluster functions 'Interactive' ...

In this example, a cap for the execution time (so-called walltime) and for the maximum memory requirements are set. The progress of the submitted jobs can be checked with getStatus().

## Status for 10 jobs at 2020-10-21 09:39:36:
##   Submitted    : 10 (100.0%)
##   -- Queued    :  0 (  0.0%)
##   -- Started   : 10 (100.0%)
##   ---- Running :  0 (  0.0%)
##   ---- Done    : 10 (100.0%)
##   ---- Error   :  0 (  0.0%)
##   ---- Expired :  0 (  0.0%)

The resulting output includes the number of jobs in the registry, how many have been submitted, have started to execute on the batch system, are currently running, have successfully completed, and have terminated due to an R exception. After jobs have successfully terminated, we can load their results on the master. This can be done in a simple fashion by using either loadResult(), which returns a single result exactly in the form it was calculated during mapping, or by using reduceResults(), which is a version of Reduce() from the base package for registry objects.

## [1] TRUE
mean(sapply(1:10, loadResult))
## [1] 3.140652
reduceResults(function(x, y) x + y) / 10
## [1] 3.140652

If you are absolutely sure that your function works, you can take a shortcut and use batchtools in an lapply fashion using btlapply(). This function creates a temporary registry (but you may also pass one yourself), calls batchMap(), wait for the jobs to terminate with waitForJobs() and then uses reduceResultsList() to return the results.

res = btlapply(rep(1e5, 10), piApprox)
mean(unlist(res))
## [1] 3.14124

Example 2: Machine Learning

We stick to a rather simple, but not unrealistic example to explain some further functionalities: Applying two classification learners to the famous iris data set (Anderson 1935), vary a few hyperparameters and evaluate the effect on the classification performance.

First, we create a registry, the central meta-data object which records technical details and the setup of the experiments. We use an ExperimentRegistry where the job definition is split into creating problems and algorithms. See the paper on BatchJobs and BatchExperiments for a detailed explanation. Again, we use a temporary registry and make it the default registry.

library(batchtools)
reg = makeExperimentRegistry(file.dir = NA, seed = 1)

Problems and Algorithms

By adding a problem to the registry, we can define the data on which certain computational jobs shall work. This can be a matrix, data frame or array that always stays the same for all subsequent experiments. But it can also be of a more dynamic nature, e.g., subsamples of a dataset or random numbers drawn from a probability distribution . Therefore the function addProblem() accepts static parts in its data argument, which is passed to the argument fun which generates a (possibly stochastic) problem instance. For data, any R object can be used. If only data is given, the generated instance is data. The argument fun has to be a function with the arguments data and job (and optionally other arbitrary parameters). The argument job is an object of type Job which holds additional information about the job.

We want to split the iris data set into a training set and test set. In this example we use use subsampling which just randomly takes a fraction of the observations as training set. We define a problem function which returns the indices of the respective training and test set for a split with 100 * ratio% of the observations being in the test set:

subsample = function(data, job, ratio, ...) {
  n = nrow(data)
  train = sample(n, floor(n * ratio))
  test = setdiff(seq_len(n), train)
  list(test = test, train = train)
}

addProblem() files the problem to the file system and the problem gets recorded in the registry.

data("iris", package = "datasets")
addProblem(name = "iris", data = iris, fun = subsample, seed = 42)
## Adding problem 'iris'

The function call will be evaluated at a later stage on the workers. In this process, the data part will be loaded and passed to the function. Note that we set a problem seed to synchronize the experiments in the sense that the same resampled training and test sets are used for the algorithm comparison in each distinct replication.

The algorithms for the jobs are added to the registry in a similar manner. When using addAlgorithm(), an identifier as well as the algorithm to apply to are required arguments. The algorithm must be given as a function with arguments job, data and instance. Further arbitrary arguments (e.g., hyperparameters or strategy parameters) may be defined analogously as for the function in addProblem. The objects passed to the function via job and data are here the same as above, while via instance the return value of the evaluated problem function is passed. The algorithm can return any R object which will automatically be stored on the file system for later retrieval. Firstly, we create an algorithm which applies a support vector machine:

svm.wrapper = function(data, job, instance, ...) {
  library("e1071")
  mod = svm(Species ~ ., data = data[instance$train, ], ...)
  pred = predict(mod, newdata = data[instance$test, ], type = "class")
  table(data$Species[instance$test], pred)
}
addAlgorithm(name = "svm", fun = svm.wrapper)
## Adding algorithm 'svm'

Secondly, a random forest of classification trees:

forest.wrapper = function(data, job, instance, ...) {
  library("ranger")
  mod = ranger(Species ~ ., data = data[instance$train, ], write.forest = TRUE)
  pred = predict(mod, data = data[instance$test, ])
  table(data$Species[instance$test], pred$predictions)
}
addAlgorithm(name = "forest", fun = forest.wrapper)
## Adding algorithm 'forest'

Both algorithms return a confusion matrix for the predictions on the test set, which will later be used to calculate the misclassification rate.

Note that using the ... argument in the wrapper definitions allows us to circumvent naming specific design parameters for now. This is an advantage if we later want to extend the set of algorithm parameters in the experiment. The algorithms get recorded in the registry and the corresponding functions are stored on the file system.

Defined problems and algorithms can be queried with:

reg$problems
## [1] "iris"
reg$algorithms
## [1] "svm"    "forest"

The flow to define experiments is summarized in the following figure:

Creating jobs

addExperiments() is used to parametrize the jobs and thereby define computational jobs. To do so, you have to pass named lists of parameters to addExperiments(). The elements of the respective list (one for problems and one for algorithms) must be named after the problem or algorithm they refer to. The data frames contain parameter constellations for the problem or algorithm function where columns must have the same names as the target arguments. When the problem design and the algorithm design are combined in addExperiments(), each combination of the parameter sets of the two designs defines a distinct job. How often each of these jobs should be computed can be determined with the argument repls.

# problem design: try two values for the ratio parameter
pdes = list(iris = data.table(ratio = c(0.67, 0.9)))

# algorithm design: try combinations of kernel and epsilon exhaustively,
# try different number of trees for the forest
ades = list(
  svm = CJ(kernel = c("linear", "polynomial", "radial"), epsilon = c(0.01, 0.1)),
  forest = data.table(ntree = c(100, 500, 1000))
)

addExperiments(pdes, ades, repls = 5)
## Adding 60 experiments ('iris'[2] x 'svm'[6] x repls[5]) ...
## Adding 30 experiments ('iris'[2] x 'forest'[3] x repls[5]) ...

The jobs are now available in the registry with an individual job ID for each. The function summarizeExperiments() returns a table which gives a quick overview over all defined experiments.

##    problem algorithm .count
##     <char>    <char>  <int>
## 1:    iris       svm     60
## 2:    iris    forest     30
summarizeExperiments(by = c("problem", "algorithm", "ratio"))
##    problem algorithm ratio .count
##     <char>    <char> <num>  <int>
## 1:    iris       svm  0.67     30
## 2:    iris       svm  0.90     30
## 3:    iris    forest  0.67     15
## 4:    iris    forest  0.90     15

Before Submitting

Before submitting all jobs to the batch system, we encourage you to test each algorithm individually. Or sometimes you want to submit only a subset of experiments because the jobs vastly differ in runtime. Another reoccurring task is the collection of results for only a subset of experiments. For all these use cases, findExperiments() can be employed to conveniently select a particular subset of jobs. It returns the IDs of all experiments that match the given criteria. Your selection can depend on substring matches of problem or algorithm IDs using prob.name or algo.name, respectively. You can also pass R expressions, which will be evaluated in your problem parameter setting (prob.pars) or algorithm parameter setting (algo.pars). The expression is then expected to evaluate to a Boolean value. Furthermore, you can restrict the experiments to specific replication numbers.

To illustrate findExperiments(), we will select two experiments, one with a support vector machine and the other with a random forest and the parameter ntree = 1000. The selected experiment IDs are then passed to testJob.

id1 = head(findExperiments(algo.name = "svm"), 1)
print(id1)
##    job.id
##     <int>
## 1:      1
id2 = head(findExperiments(algo.name = "forest", algo.pars = (ntree == 1000)), 1)
print(id2)
##    job.id
##     <int>
## 1:     71
testJob(id = id1)
## ### [bt]: Generating problem instance for problem 'iris' ...
## ### [bt]: Applying algorithm 'svm' on problem 'iris' for job 1 (seed = 2) ...
##             pred
##              setosa versicolor virginica
##   setosa         13          0         0
##   versicolor      0         17         0
##   virginica       0          1        19
testJob(id = id2)
## ### [bt]: Generating problem instance for problem 'iris' ...
## ### [bt]: Applying algorithm 'forest' on problem 'iris' for job 71 (seed = 72) ...
##             
##              setosa versicolor virginica
##   setosa         13          0         0
##   versicolor      0         16         1
##   virginica       0          1        19

If something goes wrong, batchtools comes with a bunch of useful debugging utilities (see separate vignette on error handling). If everything turns out fine, we can proceed with the calculation.

Submitting and Collecting Results

To submit the jobs, we call submitJobs() and wait for all jobs to terminate using waitForJobs().

## Submitting 90 jobs in 90 chunks using cluster functions 'Interactive' ...
## [1] TRUE

After jobs are finished, the results can be collected with reduceResultsDataTable() where we directly extract the mean misclassification error:

reduce = function(res) list(mce = (sum(res) - sum(diag(res))) / sum(res))
results = unwrap(reduceResultsDataTable(fun = reduce))
head(results)
##    job.id   mce
##     <int> <num>
## 1:      1  0.02
## 2:      2  0.00
## 3:      3  0.04
## 4:      4  0.06
## 5:      5  0.02
## 6:      6  0.02

Next, we merge the results table with the table of job parameters using one of the join helpers provided by batchtools (here, we use an inner join):

pars = unwrap(getJobPars())
tab = ijoin(pars, results)
head(tab)
##    job.id problem algorithm ratio kernel epsilon ntree   mce
##     <int>  <char>    <char> <num> <char>   <num> <num> <num>
## 1:      1    iris       svm  0.67 linear    0.01    NA  0.02
## 2:      2    iris       svm  0.67 linear    0.01    NA  0.00
## 3:      3    iris       svm  0.67 linear    0.01    NA  0.04
## 4:      4    iris       svm  0.67 linear    0.01    NA  0.06
## 5:      5    iris       svm  0.67 linear    0.01    NA  0.02
## 6:      6    iris       svm  0.67 linear    0.10    NA  0.02

We now aggregate the results group-wise. You can use data.table, base::aggregate(), or the dplyr package for this purpose. Here, we use data.table to subset the table to jobs where the ratio is 0.67 and group by algorithm the algorithm hyperparameters:

tab[ratio == 0.67, list(mmce = mean(mce)),
  by = c("algorithm", "kernel", "epsilon", "ntree")]
##    algorithm     kernel epsilon ntree  mmce
##       <char>     <char>   <num> <num> <num>
## 1:       svm     linear    0.01    NA 0.028
## 2:       svm     linear    0.10    NA 0.028
## 3:       svm polynomial    0.01    NA 0.096
## 4:       svm polynomial    0.10    NA 0.096
## 5:       svm     radial    0.01    NA 0.044
## 6:       svm     radial    0.10    NA 0.044
## 7:    forest       <NA>      NA   100 0.044
## 8:    forest       <NA>      NA   500 0.048
## 9:    forest       <NA>      NA  1000 0.044

Example: Error Handling

In any large scale experiment many things can and will go wrong. The cluster might have an outage, jobs may run into resource limits or crash, subtle bugs in your code could be triggered or any other error condition might arise. In these situations it is important to quickly determine what went wrong and to recompute only the minimal number of required jobs.

Therefore, before you submit anything you should use testJob() to catch errors that are easy to spot because they are raised in many or all jobs. If external is set, this function runs the job without side effects in an independent R process on your local machine via Rscript similar as on the slave, redirects the output of the process to your R console, loads the job result and returns it. If you do not set external, the job is executed is in the currently running R session, with the drawback that you might be unable to catch missing variable declarations or missing package dependencies.

By way of illustration here is a small example. First, we create a temporary registry.

library(batchtools)
reg = makeRegistry(file.dir = NA, seed = 1)

Ten jobs are created, one will trow a warning and two of them will raise an exception.

flakeyFunction <- function(value) {
  if (value == 5) warning("Just a simple warning")
  if (value %in% c(2, 9)) stop("Ooops.")
  value^2
}
batchMap(flakeyFunction, 1:10)
## Adding 10 jobs ...

Now that the jobs are defined, we can test jobs independently:

testJob(id = 1)
## ### [bt]: Setting seed to 2 ...
## [1] 1

In this case, testing the job with ID = 1 provides the appropriate result but testing the job with ID = 2 leads to an error:

## ### [bt]: Setting seed to 3 ...
## Error in (function (value)  : Ooops.
## [1] "Error in (function (value)  : Ooops.\n"

We ignore the error here, and just assume everything looks fine and submit all jobs.

## Submitting 10 jobs in 10 chunks using cluster functions 'Interactive' ...
## Error in (function (value)  : Ooops.
## Warning in (function (value) : Just a simple warning
## Error in (function (value)  : Ooops.
## [1] FALSE

After you have submitted jobs and suspect that something is going wrong, the first thing to do is to run getStatus() to display a summary of the current state of the system.

## Status for 10 jobs at 2020-10-21 09:39:40:
##   Submitted    : 10 (100.0%)
##   -- Queued    :  0 (  0.0%)
##   -- Started   : 10 (100.0%)
##   ---- Running :  0 (  0.0%)
##   ---- Done    :  8 ( 80.0%)
##   ---- Error   :  2 ( 20.0%)
##   ---- Expired :  0 (  0.0%)

The status message shows that two of the jobs could not be executed successfully. To get the IDs of all jobs that failed due to an error we can use findErrors() and to retrieve the actual error message, we can use getErrorMessages().

##    job.id
##     <int>
## 1:      2
## 2:      9
##    job.id terminated  error                              message
##     <int>     <lgcl> <lgcl>                               <char>
## 1:      2       TRUE   TRUE Error in (function (value)  : Ooops.
## 2:      9       TRUE   TRUE Error in (function (value)  : Ooops.

If we want to peek into the R log file of a job to see more context for the error we can use showLog() which opens a pager or use getLog() to get the log as character vector:

tail(getLog(id = 9))
## [1] "### [bt]: Memory measurement disabled"                           
## [2] "### [bt]: Starting job [batchtools job.id=9]"                    
## [3] "### [bt]: Setting seed to 10 ..."                                
## [4] ""                                                                
## [5] "### [bt]: Job terminated with an exception [batchtools job.id=9]"
## [6] "### [bt]: Calculation finished!"

You can also grep for messages (output suppressed in this vignette for technical reasons):

grepLogs(pattern = "simple", ignore.case = TRUE)

Workflow

On the Local System

  1. Create a Registry with makeRegistry() (or makeExperimentRegistry()) or load an existing from the file system with loadRegistry().
  2. Define computational jobs with batchMap() or batchReduce() if you used makeRegistry() or define with addAlgorithm(), addProblem() and addExperiments() if you started with makeExperimentRegistry(). It is advised to test some jobs with testJob() in the interactive session and with testJob(external = TRUE) in a separate R process. Note that you can add additional jobs if you are using an ExperimentRegistry.
  3. If required, query the data base for job ids depending on their status, parameters or tags (see findJobs()). The returned tables can easily be combined in a set-like fashion with data base verbs: union (ojoin() for outer join), intersect (ijoin() for inner join), difference (ajoin() for anti join).
  4. Submit jobs with submitJobs(). You can specify job resources here. If you have thousands of fast terminating jobs, you want to chunk() them first. If some jobs already terminated, you can estimate the runtimes with estimateRuntimes() and chunk jobs into heterogeneous groups with lpt() and binpack().
  5. Monitor jobs. getStatus() gives a summarizing overview. Use showLog() and grepLogs() to investigate log file. Run jobs in the currently running session with testJob() to get a traceback().
  6. Collect (partial) results. loadResult() retrieves a single result from the file system. reduceResults() mimics Reduce() and allows to apply a function to many files in an iterative fashion. reduceResultsList() and reduceResultsDataTable() collect results into a list or data.table, respectively.

On Multiple Systems

Most users develop and prototype their experiments on a desktop box in their preferred IDE and later deploy to a large computing cluster. This can be done by prototyping locally (testJob() or submit subsets via submitJobs()). To deploy to the cluster, just copy the file directory (as reported by reg$file.dir) to the remote system. Next, log in on the cluster (typically via ssh), cd to the copied directory and call loadRegistry("<file.dir.on.remote">, "<work.dir.on.remote>", writeable = TRUE). This function will (a) source the local configuration file so that you can talk to the cluster (verify by checking the output of reg$cluster.functions) and (b) adjust the paths to the new system if argument update.paths is set. After loading the Registry, it is advised to test some jobs again with testJob() before submitting all of them with submitJobs(resources = list()) (remember you now need to set resources!). After some jobs are finished, the file.dir can be copied back (do not merge with the previous directory!) and loaded again with loadRegistry().

This approach is totally viable as long as some general rules are followed:

  1. Make sure you have all packages installed. Package versions can be synchronized across machines with checkpoint or packrat.
  2. Test jobs on the remote system prior to submit to ensure that paths are resolved correctly.
  3. Make sure you have set the cluster functions in a configuration file, and stick to one backend as long as jobs are running.
  4. The status can only be monitored on the remote system (for obvious reasons).
  5. Partial results can be inspected both on the remote system and on the local system. For the latter, you need to copy over the complete file.dir first. Overwriting/merging directories is not advised as this may lead to inconsistencies if you added or removed experiments on the remote. If you have to merge, use rsync with option --delete. Load the registry locally with loadRegistry() and collect results. Do not copy back and forth.
  6. Avoid accessing the file.dir with multiple sessions simultaneously. This includes accessing the registry via a mount! Simultaneous access may lead to inconsistencies and missing results.

Site built with pkgdown 1.6.1.

================================================ FILE: docs/articles/batchtools_files/header-attrs-2.4/header-attrs.js ================================================ // Pandoc 2.9 adds attributes on both header and div. We remove the former (to // be compatible with the behavior of Pandoc < 2.8). document.addEventListener('DOMContentLoaded', function(e) { var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); var i, h, a; for (i = 0; i < hs.length; i++) { h = hs[i]; if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 a = h.attributes; while (a.length > 0) h.removeAttribute(a[0].name); } }); ================================================ FILE: docs/articles/index.html ================================================ Articles • batchtools

All vignettes

batchtools

Site built with pkgdown 1.6.1.

================================================ FILE: docs/authors.html ================================================ Citation and Authors • batchtools

Michel Lang, Bernd Bischl, Dirk Surmann (2017). batchtools: Tools for R to work on batch systems. The Journal of Open Source Software, 2(10). URL https://doi.org/10.21105/joss.00135.

@Article{,
  title = {batchtools: Tools for R to work on batch systems},
  author = {Michel Lang and Bernd Bischl and Dirk Surmann},
  journal = {The Journal of Open Source Software},
  year = {2017},
  month = {feb},
  number = {10},
  doi = {10.21105/joss.00135},
  url = {https://doi.org/10.21105/joss.00135},
}

Bernd Bischl, Michel Lang, Olaf Mersmann, Joerg Rahnenfuehrer, Claus Weihs (2015). BatchJobs and BatchExperiments: Abstraction Mechanisms for Using R in Batch Environments. Journal of Statistical Software, 64(11), 1-25. URL https://www.jstatsoft.org/v64/i11/.

@Article{,
  title = {{BatchJobs} and {BatchExperiments}: Abstraction Mechanisms for Using {R} in Batch Environments},
  author = {Bernd Bischl and Michel Lang and Olaf Mersmann and J{\"o}rg Rahnenf{\"u}hrer and Claus Weihs},
  journal = {Journal of Statistical Software},
  year = {2015},
  volume = {64},
  number = {11},
  pages = {1--25},
  url = {https://www.jstatsoft.org/v64/i11/},
}
  • Michel Lang. Maintainer, author.

  • Bernd Bischl. Author.

  • Dirk Surmann. Contributor.

Site built with pkgdown 1.6.1.

================================================ FILE: docs/bootstrap-toc.css ================================================ /*! * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) * Copyright 2015 Aidan Feldman * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ /* All levels of nav */ nav[data-toggle='toc'] .nav > li > a { display: block; padding: 4px 20px; font-size: 13px; font-weight: 500; color: #767676; } nav[data-toggle='toc'] .nav > li > a:hover, nav[data-toggle='toc'] .nav > li > a:focus { padding-left: 19px; color: #563d7c; text-decoration: none; background-color: transparent; border-left: 1px solid #563d7c; } nav[data-toggle='toc'] .nav > .active > a, nav[data-toggle='toc'] .nav > .active:hover > a, nav[data-toggle='toc'] .nav > .active:focus > a { padding-left: 18px; font-weight: bold; color: #563d7c; background-color: transparent; border-left: 2px solid #563d7c; } /* Nav: second level (shown on .active) */ nav[data-toggle='toc'] .nav .nav { display: none; /* Hide by default, but at >768px, show it */ padding-bottom: 10px; } nav[data-toggle='toc'] .nav .nav > li > a { padding-top: 1px; padding-bottom: 1px; padding-left: 30px; font-size: 12px; font-weight: normal; } nav[data-toggle='toc'] .nav .nav > li > a:hover, nav[data-toggle='toc'] .nav .nav > li > a:focus { padding-left: 29px; } nav[data-toggle='toc'] .nav .nav > .active > a, nav[data-toggle='toc'] .nav .nav > .active:hover > a, nav[data-toggle='toc'] .nav .nav > .active:focus > a { padding-left: 28px; font-weight: 500; } /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ nav[data-toggle='toc'] .nav > .active > ul { display: block; } ================================================ FILE: docs/bootstrap-toc.js ================================================ /*! * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) * Copyright 2015 Aidan Feldman * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ (function() { 'use strict'; window.Toc = { helpers: { // return all matching elements in the set, or their descendants findOrFilter: function($el, selector) { // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ // http://stackoverflow.com/a/12731439/358804 var $descendants = $el.find(selector); return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); }, generateUniqueIdBase: function(el) { var text = $(el).text(); var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); return anchor || el.tagName.toLowerCase(); }, generateUniqueId: function(el) { var anchorBase = this.generateUniqueIdBase(el); for (var i = 0; ; i++) { var anchor = anchorBase; if (i > 0) { // add suffix anchor += '-' + i; } // check if ID already exists if (!document.getElementById(anchor)) { return anchor; } } }, generateAnchor: function(el) { if (el.id) { return el.id; } else { var anchor = this.generateUniqueId(el); el.id = anchor; return anchor; } }, createNavList: function() { return $(''); }, createChildNavList: function($parent) { var $childList = this.createNavList(); $parent.append($childList); return $childList; }, generateNavEl: function(anchor, text) { var $a = $(''); $a.attr('href', '#' + anchor); $a.text(text); var $li = $('
  • '); $li.append($a); return $li; }, generateNavItem: function(headingEl) { var anchor = this.generateAnchor(headingEl); var $heading = $(headingEl); var text = $heading.data('toc-text') || $heading.text(); return this.generateNavEl(anchor, text); }, // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). getTopLevel: function($scope) { for (var i = 1; i <= 6; i++) { var $headings = this.findOrFilter($scope, 'h' + i); if ($headings.length > 1) { return i; } } return 1; }, // returns the elements for the top level, and the next below it getHeadings: function($scope, topLevel) { var topSelector = 'h' + topLevel; var secondaryLevel = topLevel + 1; var secondarySelector = 'h' + secondaryLevel; return this.findOrFilter($scope, topSelector + ',' + secondarySelector); }, getNavLevel: function(el) { return parseInt(el.tagName.charAt(1), 10); }, populateNav: function($topContext, topLevel, $headings) { var $context = $topContext; var $prevNav; var helpers = this; $headings.each(function(i, el) { var $newNav = helpers.generateNavItem(el); var navLevel = helpers.getNavLevel(el); // determine the proper $context if (navLevel === topLevel) { // use top level $context = $topContext; } else if ($prevNav && $context === $topContext) { // create a new level of the tree and switch to it $context = helpers.createChildNavList($prevNav); } // else use the current $context $context.append($newNav); $prevNav = $newNav; }); }, parseOps: function(arg) { var opts; if (arg.jquery) { opts = { $nav: arg }; } else { opts = arg; } opts.$scope = opts.$scope || $(document.body); return opts; } }, // accepts a jQuery object, or an options object init: function(opts) { opts = this.helpers.parseOps(opts); // ensure that the data attribute is in place for styling opts.$nav.attr('data-toggle', 'toc'); var $topContext = this.helpers.createChildNavList(opts.$nav); var topLevel = this.helpers.getTopLevel(opts.$scope); var $headings = this.helpers.getHeadings(opts.$scope, topLevel); this.helpers.populateNav($topContext, topLevel, $headings); } }; $(function() { $('nav[data-toggle="toc"]').each(function(i, el) { var $nav = $(el); Toc.init($nav); }); }); })(); ================================================ FILE: docs/docsearch.css ================================================ /* Docsearch -------------------------------------------------------------- */ /* Source: https://github.com/algolia/docsearch/ License: MIT */ .algolia-autocomplete { display: block; -webkit-box-flex: 1; -ms-flex: 1; flex: 1 } .algolia-autocomplete .ds-dropdown-menu { width: 100%; min-width: none; max-width: none; padding: .75rem 0; background-color: #fff; background-clip: padding-box; border: 1px solid rgba(0, 0, 0, .1); box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); } @media (min-width:768px) { .algolia-autocomplete .ds-dropdown-menu { width: 175% } } .algolia-autocomplete .ds-dropdown-menu::before { display: none } .algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { padding: 0; background-color: rgb(255,255,255); border: 0; max-height: 80vh; } .algolia-autocomplete .ds-dropdown-menu .ds-suggestions { margin-top: 0 } .algolia-autocomplete .algolia-docsearch-suggestion { padding: 0; overflow: visible } .algolia-autocomplete .algolia-docsearch-suggestion--category-header { padding: .125rem 1rem; margin-top: 0; font-size: 1.3em; font-weight: 500; color: #00008B; border-bottom: 0 } .algolia-autocomplete .algolia-docsearch-suggestion--wrapper { float: none; padding-top: 0 } .algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { float: none; width: auto; padding: 0; text-align: left } .algolia-autocomplete .algolia-docsearch-suggestion--content { float: none; width: auto; padding: 0 } .algolia-autocomplete .algolia-docsearch-suggestion--content::before { display: none } .algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { padding-top: .75rem; margin-top: .75rem; border-top: 1px solid rgba(0, 0, 0, .1) } .algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { display: block; padding: .1rem 1rem; margin-bottom: 0.1; font-size: 1.0em; font-weight: 400 /* display: none */ } .algolia-autocomplete .algolia-docsearch-suggestion--title { display: block; padding: .25rem 1rem; margin-bottom: 0; font-size: 0.9em; font-weight: 400 } .algolia-autocomplete .algolia-docsearch-suggestion--text { padding: 0 1rem .5rem; margin-top: -.25rem; font-size: 0.8em; font-weight: 400; line-height: 1.25 } .algolia-autocomplete .algolia-docsearch-footer { width: 110px; height: 20px; z-index: 3; margin-top: 10.66667px; float: right; font-size: 0; line-height: 0; } .algolia-autocomplete .algolia-docsearch-footer--logo { background-image: url("data:image/svg+xml;utf8,"); background-repeat: no-repeat; background-position: 50%; background-size: 100%; overflow: hidden; text-indent: -9000px; width: 100%; height: 100%; display: block; transform: translate(-8px); } .algolia-autocomplete .algolia-docsearch-suggestion--highlight { color: #FF8C00; background: rgba(232, 189, 54, 0.1) } .algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) } .algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { background-color: rgba(192, 192, 192, .15) } ================================================ FILE: docs/docsearch.js ================================================ $(function() { // register a handler to move the focus to the search bar // upon pressing shift + "/" (i.e. "?") $(document).on('keydown', function(e) { if (e.shiftKey && e.keyCode == 191) { e.preventDefault(); $("#search-input").focus(); } }); $(document).ready(function() { // do keyword highlighting /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ var mark = function() { var referrer = document.URL ; var paramKey = "q" ; if (referrer.indexOf("?") !== -1) { var qs = referrer.substr(referrer.indexOf('?') + 1); var qs_noanchor = qs.split('#')[0]; var qsa = qs_noanchor.split('&'); var keyword = ""; for (var i = 0; i < qsa.length; i++) { var currentParam = qsa[i].split('='); if (currentParam.length !== 2) { continue; } if (currentParam[0] == paramKey) { keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); } } if (keyword !== "") { $(".contents").unmark({ done: function() { $(".contents").mark(keyword); } }); } } }; mark(); }); }); /* Search term highlighting ------------------------------*/ function matchedWords(hit) { var words = []; var hierarchy = hit._highlightResult.hierarchy; // loop to fetch from lvl0, lvl1, etc. for (var idx in hierarchy) { words = words.concat(hierarchy[idx].matchedWords); } var content = hit._highlightResult.content; if (content) { words = words.concat(content.matchedWords); } // return unique words var words_uniq = [...new Set(words)]; return words_uniq; } function updateHitURL(hit) { var words = matchedWords(hit); var url = ""; if (hit.anchor) { url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; } else { url = hit.url + '?q=' + escape(words.join(" ")); } return url; } ================================================ FILE: docs/index.html ================================================ Tools for Computation on Batch Systems • batchtools

    As a successor of the packages BatchJobs and BatchExperiments, batchtools provides a parallel implementation of Map for high performance computing systems managed by schedulers like Slurm, Sun Grid Engine, OpenLava, TORQUE/OpenPBS, Load Sharing Facility (LSF) or Docker Swarm (see the setup section in the vignette).

    Main features:

    • Convenience: All relevant batch system operations (submitting, listing, killing) are either handled internally or abstracted via simple R functions
    • Portability: With a well-defined interface, the source is independent from the underlying batch system - prototype locally, deploy on any high performance cluster
    • Reproducibility: Every computational part has an associated seed stored in a data base which ensures reproducibility even when the underlying batch system changes
    • Abstraction: The code layers for algorithms, experiment definitions and execution are cleanly separated and allow to write readable and maintainable code to manage large scale computer experiments

    Installation

    Install the stable version from CRAN:

    install.packages("batchtools")

    For the development version, use devtools:

    devtools::install_github("mllg/batchtools")

    Next, you need to setup batchtools for your HPC (it will run sequentially otherwise). See the vignette for instructions.

    Why batchtools?

    The development of BatchJobs and BatchExperiments is discontinued for the following reasons:

    • Maintainability: The packages BatchJobs and BatchExperiments are tightly connected which makes maintenance difficult. Changes have to be synchronized and tested against the current CRAN versions for compatibility. Furthermore, BatchExperiments violates CRAN policies by calling internal functions of BatchJobs.
    • Data base issues: Although we invested weeks to mitigate issues with locks of the SQLite data base or file system (staged queries, file system timeouts, …), BatchJobs kept working unreliable on some systems with high latency under certain conditions. This made BatchJobs unusable for many users.

    BatchJobs and BatchExperiments will remain on CRAN, but new features are unlikely to be ported back. The vignette contains a section comparing the packages.

    Resources

    Citation

    Please cite the JOSS paper using the following BibTeX entry:

    @article{,
      doi = {10.21105/joss.00135},
      url = {https://doi.org/10.21105/joss.00135},
      year  = {2017},
      month = {feb},
      publisher = {The Open Journal},
      volume = {2},
      number = {10},
      author = {Michel Lang and Bernd Bischl and Dirk Surmann},
      title = {batchtools: Tools for R to work on batch systems},
      journal = {The Journal of Open Source Software}
    }

    Contributing to batchtools

    This R package is licensed under the LGPL-3. If you encounter problems using this software (lack of documentation, misleading or wrong documentation, unexpected behaviour, bugs, …) or just want to suggest features, please open an issue in the issue tracker. Pull requests are welcome and will be included at the discretion of the author. If you have customized a template file for your (larger) computing site, please share it: fork the repository, place your template in inst/templates and send a pull request.

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/news/index.html ================================================ Changelog • batchtools

    batchtools 0.9.14 Unreleased

    • batchMap() now supports unnamed more.args.
    • Exports are now assigned with delayedAssign().
    • Fix an option in the LSF template.

    batchtools 0.9.13 2020-03-19

    • Maintenance release for R-4.0.0.

    batchtools 0.9.12 2020-01-10

    • Moved data.table from Depends to Imports. User scripts might need to explicitly attach data.table via library() now.
    • Fixes for ClusterFunctionsMulticore.
    • Removed a workaround for system2() for R-devel (to be released as R-4.0.0).
    • New configuration option compress to select the compression algorithm (passed down to saveRDS()).

    batchtools 0.9.11 2018-08-16

    • Removed deprecated function chunkIds().
    • New default for argument fs.timeout in the cluster function constructor is 0 (was NA before).
    • Fixed a unit test for OSX.
    • Improved stability and documentation.
    • Fixed memory usage calculation.

    batchtools 0.9.10 2018-05-19

    • Exported functions findConfFile() and findTemplateFile().
    • Dropped support for providing a template file directly as string. A valid file is now always required.
    • Fixed writing to TMPDIR instead of the R session’s temporary directory.

    batchtools 0.9.9 2018-05-14

    • RDS files are explicitly stored in version 2 to ensure backward compatibility with R versions prior to 3.5.0.
    • Package fs is now used internally for all file system operations.
    • Support for per-site configuration files and templates to be set up by system administrators.
    • The print of getStatus() now includes a time stamp.
    • chunk() now optionally shuffles the ids before chunking.
    • Support for setting per-job resources in submitJobs().
    • Example templates now include resources for blas.threads and omp.threads.
    • Some bug fixes regarding read-only registries.

    batchtools 0.9.8 2017-12-15

    • Renamed column “memory” in the status table to “mem.used” to avoid name clashes with the resource specification.
    • Exported function assertRegistry().
    • New function unwrap() as alias to flatten(). The latter causes a name clash with package purrr and will be deprecated in a future version.
    • Registries now contain a unique hash which is updated each time the registry is altered. Can be utilized to invalidate caches, e.g. the cache of knitr.

    batchtools 0.9.7 2017-11-15

    • Added a workaround for a test to be compatible with testthat v2.0.0.
    • Better and more customizable handling of expired jobs in waitForJobs().
    • Package foreach is now supported for nested parallelization as an alternative to parallelMap.
    • Depreciated argument flatten has been removed.
    • New helper function flatten() to manually unnest/unwrap lists in data frames.
    • Removed functions getProblemIds() and getAlgorithmIds(). Instead, you can just access reg$problems or reg$algorithms, respectively.
    • The number of the maximum concurrent jobs can now also be controlled via setting resources.
    • Internal data base changes to speed up some operations. Old registries are updated on first load by loadRegistry().
    • Fixed a bug where the sleep mechanism between queries was not working.
    • Fixed a bug where submit errors on SLURM and TORQUE were not detected as temporary.

    batchtools 0.9.6 2017-09-06

    • Fixed a bug where the wrong problem was retrieved from the cache. This was only triggered for chunked jobs in combination with an ExperimentRegistry.

    batchtools 0.9.5 2017-08-18

    • Added a missing routine to upgrade registries created with batchtools prior to v0.9.3.
    • Fixed a bug where the registry could not be synced if jobs failed during initialization (#135).
    • The sleep duration for waitForJobs() and submitJobs() can now be set via the configuration file.
    • A new heuristic will try to detect if the registry has been altered by a simultaneously running R session. If this is detected, the registry in the current session will be set to a read-only state.
    • waitForJobs() has been reworked to allow control over the heuristic to detect expired jobs. Jobs are treated as expired if they have been submitted but are not detected on the system for expire.after iterations (default 3 iterations, before 1 iteration).
    • New argument writeable for loadRegistry() to allow loading registries explicitly as read-only.
    • Removed argument update.paths from loadRegistry(). Paths are always updated, but the registry on the file system remains unchanged unless loaded in read-write mode.
    • ClusterFunctionsSlurm now come with an experimental nodename argument. If set, all communication with the master is handled via SSH which effectively allows you to submit jobs from your local machine instead of the head node. Note that mounting the file system (e.g., via SSHFS) is mandatory.

    batchtools 0.9.4 2017-08-07

    • Fixed handling of file.dir with special chars like whitespace.
    • All backward slashes will now be converted to forward slashes on windows.
    • Fixed order of arguments in findExperiments() (argument ids is now first).
    • Removed code to upgrade registries created with versions prior to v0.9.0 (first CRAN release).
    • addExperiments() now warns if a design is passed as data.frame with factor columns and stringsAsFactors is TRUE.
    • Added functions setJobNames() and getJobNames() to control the name of jobs on batch systems. Templates should be adapted to use job.name instead of job.hash for naming.
    • Argument flatten of getJobResources(), getJobPars() and getJobTable() is deprecated and will be removed. Future versions of the functions will behave like flatten is set to FALSE explicitly. Single resources/parameters must be extracted manually (or with tidyr::unnest()).

    batchtools 0.9.3 2017-04-21

    • Running jobs now are also included while querying for status “started”. This affects findStarted(), findNotStarted() and getStatus().
    • findExperiments() now performs an exact string match (instead of matching substrings) for patterns specified via prob.name and algo.name. For substring matching, use prob.pattern or algo.pattern, respectively.
    • Changed arguments for reduceResultsDataTable()
      • Removed fill, now is always TRUE
      • Introduced flatten to control if the result should be represented as a column of lists or flattened as separate columns. Defaults to a backward-compatible heuristic, similar to getJobPars.
    • Improved heuristic to lookup template files. Templates shipped with the package can now be used by providing just the file name (w/o extension).
    • Updated CITATION

    batchtools 0.9.2 2017-02-20

    • Full support for array jobs on Slurm and TORQUE.
    • Array jobs have been disabled for SGE and LSF (due to missing information about the output format) but will be re-enable in a future release. Note that the variable n.array.jobs has been removed from JobCollection in favor of the new variable array.jobs (logical).
    • findExperiments() now has two additional arguments to match using regular expressions. The possibility to prefix a string with “~” to enable regular expression matching has been removed.
    • New function batchReduce().
    • New function estimateRuntimes().
    • New function removeRegistry().
    • Missing result files are now handled more consistently, raising an exception in its defaults if the result is not available. The argument missing.val has been added to reduceResultsList() and reduceResultsDataTable() and removed from loadResult() and batchMapResults().
    • Timestamps are now stored with sub-second accuracy.
    • Renamed Torque to TORQUE. This especially affects the constructor makeClusterFunctionsTorque which now must be called via makeClusterFunctionsTORQUE()
    • chunkIds() has been deprecated. Use chunk(), lpt() or binpack() instead.
    • Fixed listing of jobs for ClusterFunctionsLSF and ClusterFunctionsOpenLava (thanks to @phaverty).
    • Job hashes are now prefixed with the literal string ‘job’ to ensure they start with a letter as required by some SGE systems.
    • Fixed handling of NULL results in reduceResultsList()
    • Fixed key lookup heuristic join functions.
    • Fixed a bug where getJobTable() returned difftimes with the wrong unit (e.g., in minutes instead of seconds).
    • Deactivated swap allocation for ClusterFunctionsDocker.
    • The package is now more patient while communicating with the scheduler or file system by using a timeout-based approach. This should make the package more reliable and robust under heavy load.

    batchtools 0.9.0 2016-11-08

    Initial CRAN release. See the vignette for a brief comparison with BatchJobs/BatchExperiments.

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/pkgdown.css ================================================ /* Sticky footer */ /** * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css * * .Site -> body > .container * .Site-content -> body > .container .row * .footer -> footer * * Key idea seems to be to ensure that .container and __all its parents__ * have height set to 100% * */ html, body { height: 100%; } body { position: relative; } body > .container { display: flex; height: 100%; flex-direction: column; } body > .container .row { flex: 1 0 auto; } footer { margin-top: 45px; padding: 35px 0 36px; border-top: 1px solid #e5e5e5; color: #666; display: flex; flex-shrink: 0; } footer p { margin-bottom: 0; } footer div { flex: 1; } footer .pkgdown { text-align: right; } footer p { margin-bottom: 0; } img.icon { float: right; } img { max-width: 100%; } /* Fix bug in bootstrap (only seen in firefox) */ summary { display: list-item; } /* Typographic tweaking ---------------------------------*/ .contents .page-header { margin-top: calc(-60px + 1em); } dd { margin-left: 3em; } /* Section anchors ---------------------------------*/ a.anchor { margin-left: -30px; display:inline-block; width: 30px; height: 30px; visibility: hidden; background-image: url(./link.svg); background-repeat: no-repeat; background-size: 20px 20px; background-position: center center; } .hasAnchor:hover a.anchor { visibility: visible; } @media (max-width: 767px) { .hasAnchor:hover a.anchor { visibility: hidden; } } /* Fixes for fixed navbar --------------------------*/ .contents h1, .contents h2, .contents h3, .contents h4 { padding-top: 60px; margin-top: -40px; } /* Navbar submenu --------------------------*/ .dropdown-submenu { position: relative; } .dropdown-submenu>.dropdown-menu { top: 0; left: 100%; margin-top: -6px; margin-left: -1px; border-radius: 0 6px 6px 6px; } .dropdown-submenu:hover>.dropdown-menu { display: block; } .dropdown-submenu>a:after { display: block; content: " "; float: right; width: 0; height: 0; border-color: transparent; border-style: solid; border-width: 5px 0 5px 5px; border-left-color: #cccccc; margin-top: 5px; margin-right: -10px; } .dropdown-submenu:hover>a:after { border-left-color: #ffffff; } .dropdown-submenu.pull-left { float: none; } .dropdown-submenu.pull-left>.dropdown-menu { left: -100%; margin-left: 10px; border-radius: 6px 0 6px 6px; } /* Sidebar --------------------------*/ #pkgdown-sidebar { margin-top: 30px; position: -webkit-sticky; position: sticky; top: 70px; } #pkgdown-sidebar h2 { font-size: 1.5em; margin-top: 1em; } #pkgdown-sidebar h2:first-child { margin-top: 0; } #pkgdown-sidebar .list-unstyled li { margin-bottom: 0.5em; } /* bootstrap-toc tweaks ------------------------------------------------------*/ /* All levels of nav */ nav[data-toggle='toc'] .nav > li > a { padding: 4px 20px 4px 6px; font-size: 1.5rem; font-weight: 400; color: inherit; } nav[data-toggle='toc'] .nav > li > a:hover, nav[data-toggle='toc'] .nav > li > a:focus { padding-left: 5px; color: inherit; border-left: 1px solid #878787; } nav[data-toggle='toc'] .nav > .active > a, nav[data-toggle='toc'] .nav > .active:hover > a, nav[data-toggle='toc'] .nav > .active:focus > a { padding-left: 5px; font-size: 1.5rem; font-weight: 400; color: inherit; border-left: 2px solid #878787; } /* Nav: second level (shown on .active) */ nav[data-toggle='toc'] .nav .nav { display: none; /* Hide by default, but at >768px, show it */ padding-bottom: 10px; } nav[data-toggle='toc'] .nav .nav > li > a { padding-left: 16px; font-size: 1.35rem; } nav[data-toggle='toc'] .nav .nav > li > a:hover, nav[data-toggle='toc'] .nav .nav > li > a:focus { padding-left: 15px; } nav[data-toggle='toc'] .nav .nav > .active > a, nav[data-toggle='toc'] .nav .nav > .active:hover > a, nav[data-toggle='toc'] .nav .nav > .active:focus > a { padding-left: 15px; font-weight: 500; font-size: 1.35rem; } /* orcid ------------------------------------------------------------------- */ .orcid { font-size: 16px; color: #A6CE39; /* margins are required by official ORCID trademark and display guidelines */ margin-left:4px; margin-right:4px; vertical-align: middle; } /* Reference index & topics ----------------------------------------------- */ .ref-index th {font-weight: normal;} .ref-index td {vertical-align: top; min-width: 100px} .ref-index .icon {width: 40px;} .ref-index .alias {width: 40%;} .ref-index-icons .alias {width: calc(40% - 40px);} .ref-index .title {width: 60%;} .ref-arguments th {text-align: right; padding-right: 10px;} .ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px} .ref-arguments .name {width: 20%;} .ref-arguments .desc {width: 80%;} /* Nice scrolling for wide elements --------------------------------------- */ table { display: block; overflow: auto; } /* Syntax highlighting ---------------------------------------------------- */ pre { word-wrap: normal; word-break: normal; border: 1px solid #eee; } pre, code { background-color: #f8f8f8; color: #333; } pre code { overflow: auto; word-wrap: normal; white-space: pre; } pre .img { margin: 5px 0; } pre .img img { background-color: #fff; display: block; height: auto; } code a, pre a { color: #375f84; } a.sourceLine:hover { text-decoration: none; } .fl {color: #1514b5;} .fu {color: #000000;} /* function */ .ch,.st {color: #036a07;} /* string */ .kw {color: #264D66;} /* keyword */ .co {color: #888888;} /* comment */ .message { color: black; font-weight: bolder;} .error { color: orange; font-weight: bolder;} .warning { color: #6A0366; font-weight: bolder;} /* Clipboard --------------------------*/ .hasCopyButton { position: relative; } .btn-copy-ex { position: absolute; right: 0; top: 0; visibility: hidden; } .hasCopyButton:hover button.btn-copy-ex { visibility: visible; } /* headroom.js ------------------------ */ .headroom { will-change: transform; transition: transform 200ms linear; } .headroom--pinned { transform: translateY(0%); } .headroom--unpinned { transform: translateY(-100%); } /* mark.js ----------------------------*/ mark { background-color: rgba(255, 255, 51, 0.5); border-bottom: 2px solid rgba(255, 153, 51, 0.3); padding: 1px; } /* vertical spacing after htmlwidgets */ .html-widget { margin-bottom: 10px; } /* fontawesome ------------------------ */ .fab { font-family: "Font Awesome 5 Brands" !important; } /* don't display links in code chunks when printing */ /* source: https://stackoverflow.com/a/10781533 */ @media print { code a:link:after, code a:visited:after { content: ""; } } ================================================ FILE: docs/pkgdown.js ================================================ /* http://gregfranko.com/blog/jquery-best-practices/ */ (function($) { $(function() { $('.navbar-fixed-top').headroom(); $('body').css('padding-top', $('.navbar').height() + 10); $(window).resize(function(){ $('body').css('padding-top', $('.navbar').height() + 10); }); $('[data-toggle="tooltip"]').tooltip(); var cur_path = paths(location.pathname); var links = $("#navbar ul li a"); var max_length = -1; var pos = -1; for (var i = 0; i < links.length; i++) { if (links[i].getAttribute("href") === "#") continue; // Ignore external links if (links[i].host !== location.host) continue; var nav_path = paths(links[i].pathname); var length = prefix_length(nav_path, cur_path); if (length > max_length) { max_length = length; pos = i; } } // Add class to parent
  • , and enclosing
  • if in dropdown if (pos >= 0) { var menu_anchor = $(links[pos]); menu_anchor.parent().addClass("active"); menu_anchor.closest("li.dropdown").addClass("active"); } }); function paths(pathname) { var pieces = pathname.split("/"); pieces.shift(); // always starts with / var end = pieces[pieces.length - 1]; if (end === "index.html" || end === "") pieces.pop(); return(pieces); } // Returns -1 if not found function prefix_length(needle, haystack) { if (needle.length > haystack.length) return(-1); // Special case for length-0 haystack, since for loop won't run if (haystack.length === 0) { return(needle.length === 0 ? 0 : -1); } for (var i = 0; i < haystack.length; i++) { if (needle[i] != haystack[i]) return(i); } return(haystack.length); } /* Clipboard --------------------------*/ function changeTooltipMessage(element, msg) { var tooltipOriginalTitle=element.getAttribute('data-original-title'); element.setAttribute('data-original-title', msg); $(element).tooltip('show'); element.setAttribute('data-original-title', tooltipOriginalTitle); } if(ClipboardJS.isSupported()) { $(document).ready(function() { var copyButton = ""; $(".examples, div.sourceCode").addClass("hasCopyButton"); // Insert copy buttons: $(copyButton).prependTo(".hasCopyButton"); // Initialize tooltips: $('.btn-copy-ex').tooltip({container: 'body'}); // Initialize clipboard: var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { text: function(trigger) { return trigger.parentNode.textContent; } }); clipboardBtnCopies.on('success', function(e) { changeTooltipMessage(e.trigger, 'Copied!'); e.clearSelection(); }); clipboardBtnCopies.on('error', function() { changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); }); }); } })(window.jQuery || window.$) ================================================ FILE: docs/pkgdown.yml ================================================ pandoc: 2.11.0.2 pkgdown: 1.6.1 pkgdown_sha: ~ articles: batchtools: batchtools.html last_built: 2020-10-21T07:39Z ================================================ FILE: docs/reference/JobCollection.html ================================================ JobCollection Constructor — makeJobCollection • batchtools

    makeJobCollection takes multiple job ids and creates an object of class “JobCollection” which holds all necessary information for the calculation with doJobCollection. It is implemented as an environment with the following variables:

    file.dir

    file.dir of the Registry.

    work.dir:

    work.dir of the Registry.

    job.hash

    Unique identifier of the job. Used to create names on the file system.

    jobs

    data.table holding individual job information. See examples.

    log.file

    Location of the designated log file for this job.

    resources:

    Named list of of specified computational resources.

    uri

    Location of the job description file (saved with link[base]{saveRDS} on the file system.

    seed

    integer(1) Seed of the Registry.

    packages

    character with required packages to load via require.

    namespaces

    codecharacter with required packages to load via requireNamespace.

    source

    character with list of files to source before execution.

    load

    character with list of files to load before execution.

    array.var

    character(1) of the array environment variable specified by the cluster functions.

    array.jobs

    logical(1) signaling if jobs were submitted using chunks.as.arrayjobs.

    If your ClusterFunctions uses a template, brew will be executed in the environment of such a collection. Thus all variables available inside the job can be used in the template.

    makeJobCollection(ids = NULL, resources = list(), reg = getDefaultRegistry())

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.

    resources

    [list]
    Named list of resources. Default is list().

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [JobCollection].

    See also

    Other JobCollection: doJobCollection()

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE, packages = "methods")
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    batchMap(identity, 1:5, reg = tmp)
    #> Adding 5 jobs ...
    # resources are usually set in submitJobs() jc = makeJobCollection(1:3, resources = list(foo = "bar"), reg = tmp) ls(jc)
    #> [1] "array.jobs" "array.var" "compress" "file.dir" "job.hash" #> [6] "job.name" "jobs" "load" "log.file" "namespaces" #> [11] "packages" "resources" "seed" "source" "uri" #> [16] "work.dir"
    jc$resources
    #> $foo #> [1] "bar" #>

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/JobExperiment.html ================================================ Jobs and Experiments — makeJob • batchtools

    Jobs and Experiments are abstract objects which hold all information necessary to execute a single computational job for a Registry or ExperimentRegistry, respectively.

    They can be created using the constructor makeJob which takes a single job id. Jobs and Experiments are passed to reduce functions like reduceResults. Furthermore, Experiments can be used in the functions of the Problem and Algorithm. Jobs and Experiments hold these information:

    job.id

    Job ID as integer.

    pars

    Job parameters as named list. For ExperimentRegistry, the parameters are divided into the sublists “prob.pars” and “algo.pars”.

    seed

    Seed which is set via doJobCollection as scalar integer.

    resources

    Computational resources which were set for this job as named list.

    external.dir

    Path to a directory which is created exclusively for this job. You can store external files here. Directory is persistent between multiple restarts of the job and can be cleaned by calling resetJobs.

    fun

    Job only: User function passed to batchMap.

    prob.name

    Experiments only: Problem id.

    algo.name

    Experiments only: Algorithm id.

    problem

    Experiments only: Problem.

    instance

    Experiments only: Problem instance.

    algorithm

    Experiments only: Algorithm.

    repl

    Experiments only: Replication number.

    Note that the slots “pars”, “fun”, “algorithm” and “problem” lazy-load required files from the file system and construct the object on the first access. The realizations are cached for all slots except “instance” (which might be stochastic).

    Jobs and Experiments can be executed manually with execJob.

    makeJob(id, reader = NULL, reg = getDefaultRegistry())

    Arguments

    id

    [integer(1) or data.table]
    Single integer to specify the job or a data.table with column job.id and exactly one row.

    reader

    [RDSReader | NULL]
    Reader object to retrieve files. Used internally to cache reading from the file system. The default (NULL) does not make use of caching.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [Job | Experiment].

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    batchMap(function(x, y) x + y, x = 1:2, more.args = list(y = 99), reg = tmp)
    #> Adding 2 jobs ...
    submitJobs(resources = list(foo = "bar"), reg = tmp)
    #> Submitting 2 jobs in 2 chunks using cluster functions 'Interactive' ...
    job = makeJob(1, reg = tmp) print(job)
    #> <Job> #> Inherits from: <BaseJob> #> Public: #> external.dir: active binding #> file.dir: /tmp/batchtools-example/reg #> fun: active binding #> id: 1 #> initialize: function (file.dir, reader, id, job.pars, seed, resources) #> job.id: active binding #> job.pars: list #> pars: active binding #> reader: RDSReader, R6 #> resources: list #> seed: 15284
    # Get the parameters: job$pars
    #> $x #> [1] 1 #> #> $y #> [1] 99 #>
    # Get the job resources: job$resources
    #> $foo #> [1] "bar" #>
    # Execute the job locally: execJob(job)
    #> ### [bt]: Setting seed to 15284 ...
    #> [1] 100

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/JobNames.html ================================================ Set and Retrieve Job Names — JobNames • batchtools

    Set custom names for jobs. These are passed to the template as ‘job.name’. If no custom name is set (or any of the job names of the chunk is missing), the job hash is used as job name. Individual job names can be accessed via jobs$job.name.

    setJobNames(ids = NULL, names, reg = getDefaultRegistry())
    
    getJobNames(ids = NULL, reg = getDefaultRegistry())

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.

    names

    [character]
    Character vector of the same length as provided ids.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    setJobNames returns NULL invisibly, getJobTable returns a data.table with columns job.id and job.name.

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    ids = batchMap(identity, 1:10, reg = tmp)
    #> Adding 10 jobs ...
    setJobNames(ids, letters[1:nrow(ids)], reg = tmp) getJobNames(reg = tmp)
    #> job.id job.name #> 1: 1 a #> 2: 2 b #> 3: 3 c #> 4: 4 d #> 5: 5 e #> 6: 6 f #> 7: 7 g #> 8: 8 h #> 9: 9 i #> 10: 10 j

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/JoinTables.html ================================================ Inner, Left, Right, Outer, Semi and Anti Join for Data Tables — JoinTables • batchtools

    These helper functions perform join operations on data tables. Most of them are basically one-liners. See https://rpubs.com/ronasta/join_data_tables for a overview of join operations in data table or alternatively dplyr's vignette on two table verbs.

    ijoin(x, y, by = NULL)
    
    ljoin(x, y, by = NULL)
    
    rjoin(x, y, by = NULL)
    
    ojoin(x, y, by = NULL)
    
    sjoin(x, y, by = NULL)
    
    ajoin(x, y, by = NULL)
    
    ujoin(x, y, all.y = FALSE, by = NULL)

    Arguments

    x

    [data.frame]
    First data.frame to join.

    y

    [data.frame]
    Second data.frame to join.

    by

    [character]
    Column name(s) of variables used to match rows in x and y. If not provided, a heuristic similar to the one described in the dplyr vignette is used:

    1. If x is keyed, the existing key will be used if y has the same column(s).

    2. If x is not keyed, the intersect of common columns names is used if not empty.

    3. Raise an exception.

    You may pass a named character vector to merge on columns with different names in x and y: by = c("x.id" = "y.id") will match x's “x.id” column with y\'s “y.id” column.

    all.y

    [logical(1)]
    Keep columns of y which are not in x?

    Value

    [data.table] with key identical to by.

    Examples

    batchtools:::example_push_temp(1) # Create two tables for demonstration tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    batchMap(identity, x = 1:6, reg = tmp)
    #> Adding 6 jobs ...
    x = getJobPars(reg = tmp) y = findJobs(x >= 2 & x <= 5, reg = tmp) y$extra.col = head(letters, nrow(y)) # Inner join: similar to intersect(): keep all columns of x and y with common matches ijoin(x, y)
    #> job.id job.pars extra.col #> 1: 2 <list[1]> a #> 2: 3 <list[1]> b #> 3: 4 <list[1]> c #> 4: 5 <list[1]> d
    # Left join: use all ids from x, keep all columns of x and y ljoin(x, y)
    #> job.id extra.col job.pars #> 1: 1 <NA> <list[1]> #> 2: 2 a <list[1]> #> 3: 3 b <list[1]> #> 4: 4 c <list[1]> #> 5: 5 d <list[1]> #> 6: 6 <NA> <list[1]>
    # Right join: use all ids from y, keep all columns of x and y rjoin(x, y)
    #> job.id job.pars extra.col #> 1: 2 <list[1]> a #> 2: 3 <list[1]> b #> 3: 4 <list[1]> c #> 4: 5 <list[1]> d
    # Outer join: similar to union(): keep all columns of x and y with matches in x or y ojoin(x, y)
    #> job.id job.pars extra.col #> 1: 1 <list[1]> <NA> #> 2: 2 <list[1]> a #> 3: 3 <list[1]> b #> 4: 4 <list[1]> c #> 5: 5 <list[1]> d #> 6: 6 <list[1]> <NA>
    # Semi join: filter x with matches in y sjoin(x, y)
    #> job.id job.pars #> 1: 2 <list[1]> #> 2: 3 <list[1]> #> 3: 4 <list[1]> #> 4: 5 <list[1]>
    # Anti join: filter x with matches not in y ajoin(x, y)
    #> job.id job.pars #> 1: 1 <list[1]> #> 2: 6 <list[1]>
    # Updating join: Replace values in x with values in y ujoin(x, y)
    #> job.id job.pars #> 1: 1 <list[1]> #> 2: 2 <list[1]> #> 3: 3 <list[1]> #> 4: 4 <list[1]> #> 5: 5 <list[1]> #> 6: 6 <list[1]>

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/Tags.html ================================================ Add or Remove Job Tags — Tags • batchtools

    Add and remove arbitrary tags to jobs.

    addJobTags(ids = NULL, tags, reg = getDefaultRegistry())
    
    removeJobTags(ids = NULL, tags, reg = getDefaultRegistry())
    
    getUsedJobTags(ids = NULL, reg = getDefaultRegistry())

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.

    tags

    [character]
    Tags to add or remove as strings. Each tag may consist of letters, numbers, underscore and dots (pattern “^[[:alnum:]_.]+”).

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with job ids affected (invisible).

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    ids = batchMap(sqrt, x = -3:3, reg = tmp)
    #> Adding 7 jobs ...
    # Add new tag to all ids addJobTags(ids, "needs.computation", reg = tmp) getJobTags(reg = tmp)
    #> job.id tags #> 1: 1 needs.computation #> 2: 2 needs.computation #> 3: 3 needs.computation #> 4: 4 needs.computation #> 5: 5 needs.computation #> 6: 6 needs.computation #> 7: 7 needs.computation
    # Add more tags addJobTags(findJobs(x < 0, reg = tmp), "x.neg", reg = tmp) addJobTags(findJobs(x > 0, reg = tmp), "x.pos", reg = tmp) getJobTags(reg = tmp)
    #> job.id tags #> 1: 1 needs.computation,x.neg #> 2: 2 needs.computation,x.neg #> 3: 3 needs.computation,x.neg #> 4: 4 needs.computation #> 5: 5 needs.computation,x.pos #> 6: 6 needs.computation,x.pos #> 7: 7 needs.computation,x.pos
    # Submit first 5 jobs and remove tag if successful ids = submitJobs(1:5, reg = tmp)
    #> Submitting 5 jobs in 5 chunks using cluster functions 'Interactive' ...
    #> Warning: NaNs produced
    #> Warning: NaNs produced
    #> Warning: NaNs produced
    if (waitForJobs(reg = tmp)) removeJobTags(ids, "needs.computation", reg = tmp) getJobTags(reg = tmp)
    #> job.id tags #> 1: 1 x.neg #> 2: 2 x.neg #> 3: 3 x.neg #> 4: 4 <NA> #> 5: 5 x.pos #> 6: 6 needs.computation,x.pos #> 7: 7 needs.computation,x.pos
    # Grep for warning message and add a tag addJobTags(grepLogs(pattern = "NaNs produced", reg = tmp), "div.zero", reg = tmp) getJobTags(reg = tmp)
    #> job.id tags #> 1: 1 x.neg #> 2: 2 x.neg #> 3: 3 x.neg #> 4: 4 <NA> #> 5: 5 x.pos #> 6: 6 needs.computation,x.pos #> 7: 7 needs.computation,x.pos
    # All tags where tag x.neg is set: ids = findTagged("x.neg", reg = tmp) getUsedJobTags(ids, reg = tmp)
    #> [1] "x.neg"

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/Worker.html ================================================ Create a Linux-Worker — Worker • batchtools

    R6Class to create local and remote linux workers.

    Format

    An R6Class generator object

    Value

    [Worker].

    Fields

    nodename

    Host name. Set via constructor.

    ncpus

    Number of CPUs. Set via constructor and defaults to a heuristic which tries to detect the number of CPUs of the machine.

    max.load

    Maximum load average (of the last 5 min). Set via constructor and defaults to the number of CPUs of the machine.

    status

    Status of the worker; one of “unknown”, “available”, “max.cpus” and “max.load”.

    Methods

    new(nodename, ncpus, max.load)

    Constructor.

    update(reg)

    Update the worker status.

    list(reg)

    List running jobs.

    start(reg, fn, outfile)

    Start job collection in file “fn” and output to “outfile”.

    kill(reg, batch.id)

    Kill job matching the “batch.id”.

    Examples

    if (FALSE) { # create a worker for the local machine and use 4 CPUs. Worker$new("localhost", ncpus = 4) }

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/addAlgorithm.html ================================================ Define Algorithms for Experiments — addAlgorithm • batchtools

    Algorithms are functions which get the codedata part as well as the problem instance (the return value of the function defined in Problem) and return an arbitrary R object.

    This function serializes all components to the file system and registers the algorithm in the ExperimentRegistry.

    removeAlgorithm removes all jobs from the registry which depend on the specific algorithm. reg$algorithms holds the IDs of already defined algorithms.

    addAlgorithm(name, fun = NULL, reg = getDefaultRegistry())
    
    removeAlgorithms(name, reg = getDefaultRegistry())

    Arguments

    name

    [character(1)]
    Unique identifier for the algorithm.

    fun

    [function]
    The algorithm function. The static problem part is passed as “data”, the generated problem instance is passed as “instance” and the Job/Experiment as “job”. Therefore, your function must have the formal arguments “job”, “data” and “instance” (or dots ...).

    If you do not provide a function, it defaults to a function which just returns the instance.

    reg

    [ExperimentRegistry]
    Registry. If not explicitly passed, uses the last created registry.

    Value

    [Algorithm]. Object of class “Algorithm”.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/addExperiments.html ================================================ Add Experiments to the Registry — addExperiments • batchtools

    Adds experiments (parametrized combinations of problems with algorithms) to the registry and thereby defines batch jobs.

    If multiple problem designs or algorithm designs are provided, they are combined via the Cartesian product. E.g., if you have two problems p1 and p2 and three algorithms a1, a2 and a3, addExperiments creates experiments for all parameters for the combinations (p1, a1), (p1, a2), (p1, a3), (p2, a1), (p2, a2) and (p2, a3).

    addExperiments(
      prob.designs = NULL,
      algo.designs = NULL,
      repls = 1L,
      combine = "crossprod",
      reg = getDefaultRegistry()
    )

    Arguments

    prob.designs

    [named list of data.frame]
    Named list of data frames (or data.table). The name must match the problem name while the column names correspond to parameters of the problem. If NULL, experiments for all defined problems without any parameters are added.

    algo.designs

    [named list of data.table or data.frame]
    Named list of data frames (or data.table). The name must match the algorithm name while the column names correspond to parameters of the algorithm. If NULL, experiments for all defined algorithms without any parameters are added.

    repls

    [integer(1)]
    Number of replications for each experiment.

    combine

    [character(1)]
    How to combine the rows of a single problem design with the rows of a single algorithm design? Default is “crossprod” which combines each row of the problem design which each row of the algorithm design in a cross-product fashion. Set to “bind” to just cbind the tables of problem and algorithm designs where the shorter table is repeated if necessary.

    reg

    [ExperimentRegistry]
    Registry. If not explicitly passed, uses the last created registry.

    Value

    [data.table] with ids of added jobs stored in column “job.id”.

    Note

    R's data.frame converts character vectors to factors by default in R versions prior to 4.0.0 which frequently resulted in problems using addExperiments. Therefore, this function will warn about factor variables if the following conditions hold:

    1. R version is < 4.0.0

    2. The design is passed as a data.frame, not a data.table or tibble.

    3. The option “stringsAsFactors” is not set or set to TRUE.

    See also

    Examples

    batchtools:::example_push_temp(1) tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    # add first problem fun = function(job, data, n, mean, sd, ...) rnorm(n, mean = mean, sd = sd) addProblem("rnorm", fun = fun, reg = tmp)
    #> Adding problem 'rnorm'
    # add second problem fun = function(job, data, n, lambda, ...) rexp(n, rate = lambda) addProblem("rexp", fun = fun, reg = tmp)
    #> Adding problem 'rexp'
    # add first algorithm fun = function(instance, method, ...) if (method == "mean") mean(instance) else median(instance) addAlgorithm("average", fun = fun, reg = tmp)
    #> Adding algorithm 'average'
    # add second algorithm fun = function(instance, ...) sd(instance) addAlgorithm("deviation", fun = fun, reg = tmp)
    #> Adding algorithm 'deviation'
    # define problem and algorithm designs library(data.table) prob.designs = algo.designs = list() prob.designs$rnorm = CJ(n = 100, mean = -1:1, sd = 1:5) prob.designs$rexp = data.table(n = 100, lambda = 1:5) algo.designs$average = data.table(method = c("mean", "median")) algo.designs$deviation = data.table() # add experiments and submit addExperiments(prob.designs, algo.designs, reg = tmp)
    #> Adding 30 experiments ('rnorm'[15] x 'average'[2] x repls[1]) ...
    #> Adding 15 experiments ('rnorm'[15] x 'deviation'[1] x repls[1]) ...
    #> Adding 10 experiments ('rexp'[5] x 'average'[2] x repls[1]) ...
    #> Adding 5 experiments ('rexp'[5] x 'deviation'[1] x repls[1]) ...
    # check what has been created summarizeExperiments(reg = tmp)
    #> problem algorithm .count #> 1: rnorm average 30 #> 2: rnorm deviation 15 #> 3: rexp average 10 #> 4: rexp deviation 5
    unwrap(getJobPars(reg = tmp))
    #> job.id problem algorithm n mean sd lambda method #> 1: 1 rnorm average 100 -1 1 NA mean #> 2: 2 rnorm average 100 -1 1 NA median #> 3: 3 rnorm average 100 -1 2 NA mean #> 4: 4 rnorm average 100 -1 2 NA median #> 5: 5 rnorm average 100 -1 3 NA mean #> 6: 6 rnorm average 100 -1 3 NA median #> 7: 7 rnorm average 100 -1 4 NA mean #> 8: 8 rnorm average 100 -1 4 NA median #> 9: 9 rnorm average 100 -1 5 NA mean #> 10: 10 rnorm average 100 -1 5 NA median #> 11: 11 rnorm average 100 0 1 NA mean #> 12: 12 rnorm average 100 0 1 NA median #> 13: 13 rnorm average 100 0 2 NA mean #> 14: 14 rnorm average 100 0 2 NA median #> 15: 15 rnorm average 100 0 3 NA mean #> 16: 16 rnorm average 100 0 3 NA median #> 17: 17 rnorm average 100 0 4 NA mean #> 18: 18 rnorm average 100 0 4 NA median #> 19: 19 rnorm average 100 0 5 NA mean #> 20: 20 rnorm average 100 0 5 NA median #> 21: 21 rnorm average 100 1 1 NA mean #> 22: 22 rnorm average 100 1 1 NA median #> 23: 23 rnorm average 100 1 2 NA mean #> 24: 24 rnorm average 100 1 2 NA median #> 25: 25 rnorm average 100 1 3 NA mean #> 26: 26 rnorm average 100 1 3 NA median #> 27: 27 rnorm average 100 1 4 NA mean #> 28: 28 rnorm average 100 1 4 NA median #> 29: 29 rnorm average 100 1 5 NA mean #> 30: 30 rnorm average 100 1 5 NA median #> 31: 31 rnorm deviation 100 -1 1 NA <NA> #> 32: 32 rnorm deviation 100 -1 2 NA <NA> #> 33: 33 rnorm deviation 100 -1 3 NA <NA> #> 34: 34 rnorm deviation 100 -1 4 NA <NA> #> 35: 35 rnorm deviation 100 -1 5 NA <NA> #> 36: 36 rnorm deviation 100 0 1 NA <NA> #> 37: 37 rnorm deviation 100 0 2 NA <NA> #> 38: 38 rnorm deviation 100 0 3 NA <NA> #> 39: 39 rnorm deviation 100 0 4 NA <NA> #> 40: 40 rnorm deviation 100 0 5 NA <NA> #> 41: 41 rnorm deviation 100 1 1 NA <NA> #> 42: 42 rnorm deviation 100 1 2 NA <NA> #> 43: 43 rnorm deviation 100 1 3 NA <NA> #> 44: 44 rnorm deviation 100 1 4 NA <NA> #> 45: 45 rnorm deviation 100 1 5 NA <NA> #> 46: 46 rexp average 100 NA NA 1 mean #> 47: 47 rexp average 100 NA NA 1 median #> 48: 48 rexp average 100 NA NA 2 mean #> 49: 49 rexp average 100 NA NA 2 median #> 50: 50 rexp average 100 NA NA 3 mean #> 51: 51 rexp average 100 NA NA 3 median #> 52: 52 rexp average 100 NA NA 4 mean #> 53: 53 rexp average 100 NA NA 4 median #> 54: 54 rexp average 100 NA NA 5 mean #> 55: 55 rexp average 100 NA NA 5 median #> 56: 56 rexp deviation 100 NA NA 1 <NA> #> 57: 57 rexp deviation 100 NA NA 2 <NA> #> 58: 58 rexp deviation 100 NA NA 3 <NA> #> 59: 59 rexp deviation 100 NA NA 4 <NA> #> 60: 60 rexp deviation 100 NA NA 5 <NA> #> job.id problem algorithm n mean sd lambda method

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/addProblem.html ================================================ Define Problems for Experiments — addProblem • batchtools

    Problems may consist of up to two parts: A static, immutable part (data in addProblem) and a dynamic, stochastic part (fun in addProblem). For example, for statistical learning problems a data frame would be the static problem part while a resampling function would be the stochastic part which creates problem instance. This instance is then typically passed to a learning algorithm like a wrapper around a statistical model (fun in addAlgorithm).

    This function serialize all components to the file system and registers the problem in the ExperimentRegistry.

    removeProblem removes all jobs from the registry which depend on the specific problem. reg$problems holds the IDs of already defined problems.

    addProblem(
      name,
      data = NULL,
      fun = NULL,
      seed = NULL,
      cache = FALSE,
      reg = getDefaultRegistry()
    )
    
    removeProblems(name, reg = getDefaultRegistry())

    Arguments

    name

    [character(1)]
    Unique identifier for the problem.

    data

    [ANY]
    Static problem part. Default is NULL.

    fun

    [function]
    The function defining the stochastic problem part. The static part is passed to this function with name “data” and the Job/Experiment is passed as “job”. Therefore, your function must have the formal arguments “job” and “data” (or dots ...). If you do not provide a function, it defaults to a function which just returns the data part.

    seed

    [integer(1)]
    Start seed for this problem. This allows the “synchronization” of a stochastic problem across algorithms, so that different algorithms are evaluated on the same stochastic instance. If the problem seed is defined, the seeding mechanism works as follows: (1) Before the dynamic part of a problem is instantiated, the seed of the problem + [replication number] - 1 is set, i.e. the first replication uses the problem seed. (2) The stochastic part of the problem is instantiated. (3) From now on the usual experiment seed of the registry is used, see ExperimentRegistry. If seed is set to NULL (default), the job seed is used to instantiate the problem and different algorithms see different stochastic instances of the same problem.

    cache

    [logical(1)]
    If TRUE and seed is set, problem instances will be cached on the file system. This assumes that each problem instance is deterministic for each combination of hyperparameter setting and each replication number. This feature is experimental.

    reg

    [ExperimentRegistry]
    Registry. If not explicitly passed, uses the last created registry.

    Value

    [Problem]. Object of class “Problem” (invisibly).

    See also

    Examples

    batchtools:::example_push_temp(1) tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    addProblem("p1", fun = function(job, data) data, reg = tmp)
    #> Adding problem 'p1'
    addProblem("p2", fun = function(job, data) job, reg = tmp)
    #> Adding problem 'p2'
    addAlgorithm("a1", fun = function(job, data, instance) instance, reg = tmp)
    #> Adding algorithm 'a1'
    addExperiments(repls = 2, reg = tmp)
    #> Adding 2 experiments ('p1'[1] x 'a1'[1] x repls[2]) ...
    #> Adding 2 experiments ('p2'[1] x 'a1'[1] x repls[2]) ...
    # List problems, algorithms and job parameters: tmp$problems
    #> [1] "p1" "p2"
    tmp$algorithms
    #> [1] "a1"
    getJobPars(reg = tmp)
    #> job.id problem prob.pars algorithm algo.pars #> 1: 1 p1 <list[0]> a1 <list[0]> #> 2: 2 p1 <list[0]> a1 <list[0]> #> 3: 3 p2 <list[0]> a1 <list[0]> #> 4: 4 p2 <list[0]> a1 <list[0]>
    # Remove one problem removeProblems("p1", reg = tmp)
    #> Removing Problem 'p1' and 2 corresponding jobs ...
    # List problems and algorithms: tmp$problems
    #> [1] "p2"
    tmp$algorithms
    #> [1] "a1"
    getJobPars(reg = tmp)
    #> job.id problem prob.pars algorithm algo.pars #> 1: 3 p2 <list[0]> a1 <list[0]> #> 2: 4 p2 <list[0]> a1 <list[0]>

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/assertRegistry.html ================================================ assertRegistry — assertRegistry • batchtools

    Assert that a given object is a batchtools registry. Additionally can sync the registry, check if it is writeable, or check if jobs are running. If any check fails, throws an error indicting the reason for the failure.

    assertRegistry(
      reg,
      class = NULL,
      writeable = FALSE,
      sync = FALSE,
      running.ok = TRUE
    )

    Arguments

    reg

    [Registry]
    The object asserted to be a Registry.

    class

    [character(1)]
    If NULL (default), reg must only inherit from class “Registry”. Otherwise check that reg is of class class. E.g., if set to “Registry”, a ExperimentRegistry would not pass.

    writeable

    [logical(1)]
    Check if the registry is writeable.

    sync

    [logical(1)]
    Try to synchronize the registry by including pending results from the file system. See syncRegistry.

    running.ok

    [logical(1)]
    If FALSE throw an error if jobs associated with the registry are currently running.

    Value

    TRUE invisibly.

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/batchExport.html ================================================ Export Objects to the Slaves — batchExport • batchtools

    Objects are saved in subdirectory “exports” of the “file.dir” of reg. They are automatically loaded and placed in the global environment each time the registry is loaded or a job collection is executed.

    batchExport(
      export = list(),
      unexport = character(0L),
      reg = getDefaultRegistry()
    )

    Arguments

    export

    [list]
    Named list of objects to export.

    unexport

    [character]
    Vector of object names to unexport.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with name and uri to the exported objects.

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    # list exports exports = batchExport(reg = tmp) print(exports)
    #> Empty data.table (0 rows and 2 cols): name,uri
    # add a job and required exports batchMap(function(x) x^2 + y + z, x = 1:3, reg = tmp)
    #> Adding 3 jobs ...
    exports = batchExport(export = list(y = 99, z = 1), reg = tmp)
    #> Exporting new objects: 'y','z' ...
    print(exports)
    #> name uri #> 1: y /tmp/batchtools-example/reg/exports/PE.rds #> 2: z /tmp/batchtools-example/reg/exports/PI.rds
    submitJobs(reg = tmp)
    #> Submitting 3 jobs in 3 chunks using cluster functions 'Interactive' ...
    waitForJobs(reg = tmp)
    #> [1] TRUE
    stopifnot(loadResult(1, reg = tmp) == 101) # Un-export z exports = batchExport(unexport = "z", reg = tmp)
    #> Un-exporting exported objects: 'z' ...
    print(exports)
    #> name uri #> 1: y /tmp/batchtools-example/reg/exports/PE.rds

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/batchMap.html ================================================ Map Operation for Batch Systems — batchMap • batchtools

    A parallel and asynchronous Map/mapply for batch systems. Note that this function only defines the computational jobs. The actual computation is started with submitJobs. Results and partial results can be collected with reduceResultsList, reduceResults or loadResult.

    For a synchronous Map-like execution, see btmapply.

    batchMap(
      fun,
      ...,
      args = list(),
      more.args = list(),
      reg = getDefaultRegistry()
    )

    Arguments

    fun

    [function]
    Function to map over arguments provided via .... Parameters given via args or ... are passed as-is, in the respective order and possibly named. If the function has the named formal argument “.job”, the Job is passed to the function on the slave.

    ...

    [ANY]
    Arguments to vectorize over (list or vector). Shorter vectors will be recycled (possibly with a warning any length is not a multiple of the longest length). Mutually exclusive with args. Note that although it is possible to iterate over large objects (e.g., lists of data frames or matrices), this usually hurts the overall performance and thus is discouraged.

    args

    [list | data.frame]
    Arguments to vectorize over as (named) list or data frame. Shorter vectors will be recycled (possibly with a warning any length is not a multiple of the longest length). Mutually exclusive with ....

    more.args

    [list]
    A list of further arguments passed to fun. Default is an empty list.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with ids of added jobs stored in column “job.id”.

    See also

    Examples

    batchtools:::example_push_temp(3) # example using "..." and more.args tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg1' using cluster functions 'Interactive'
    f = function(x, y) x^2 + y ids = batchMap(f, x = 1:10, more.args = list(y = 100), reg = tmp)
    #> Adding 10 jobs ...
    getJobPars(reg = tmp)
    #> job.id job.pars #> 1: 1 <list[1]> #> 2: 2 <list[1]> #> 3: 3 <list[1]> #> 4: 4 <list[1]> #> 5: 5 <list[1]> #> 6: 6 <list[1]> #> 7: 7 <list[1]> #> 8: 8 <list[1]> #> 9: 9 <list[1]> #> 10: 10 <list[1]>
    testJob(6, reg = tmp) # 100 + 6^2 = 136
    #> ### [bt]: Setting seed to 12787 ...
    #> [1] 136
    # vector recycling tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg2' using cluster functions 'Interactive'
    f = function(...) list(...) ids = batchMap(f, x = 1:3, y = 1:6, reg = tmp)
    #> Adding 6 jobs ...
    getJobPars(reg = tmp)
    #> job.id job.pars #> 1: 1 <list[2]> #> 2: 2 <list[2]> #> 3: 3 <list[2]> #> 4: 4 <list[2]> #> 5: 5 <list[2]> #> 6: 6 <list[2]>
    # example for an expand.grid()-like operation on parameters tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg3' using cluster functions 'Interactive'
    ids = batchMap(paste, args = data.table::CJ(x = letters[1:3], y = 1:3), reg = tmp)
    #> Adding 9 jobs ...
    getJobPars(reg = tmp)
    #> job.id job.pars #> 1: 1 <list[2]> #> 2: 2 <list[2]> #> 3: 3 <list[2]> #> 4: 4 <list[2]> #> 5: 5 <list[2]> #> 6: 6 <list[2]> #> 7: 7 <list[2]> #> 8: 8 <list[2]> #> 9: 9 <list[2]>
    testJob(6, reg = tmp)
    #> ### [bt]: Setting seed to 8571 ...
    #> [1] "b 3"

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/batchMapResults.html ================================================ Map Over Results to Create New Jobs — batchMapResults • batchtools

    This function allows you to create new computational jobs (just like batchMap based on the results of a Registry.

    batchMapResults(
      fun,
      ids = NULL,
      ...,
      more.args = list(),
      target,
      source = getDefaultRegistry()
    )

    Arguments

    fun

    [function]
    Function which takes the result as first (unnamed) argument.

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of findDone. Invalid ids are ignored.

    ...

    [ANY]
    Arguments to vectorize over (list or vector). Passed to batchMap.

    more.args

    [list]
    A list of further arguments passed to fun. Default is an empty list.

    target

    [Registry]
    Empty Registry where new jobs are created for.

    source

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with ids of jobs added to target.

    Note

    The URI to the result files in registry source is hard coded as parameter in the target registry. This means that target is currently not portable between systems for computation.

    See also

    Examples

    batchtools:::example_push_temp(2) # Source registry: calculate square of some numbers tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg1' using cluster functions 'Interactive'
    batchMap(function(x) list(square = x^2), x = 1:10, reg = tmp)
    #> Adding 10 jobs ...
    submitJobs(reg = tmp)
    #> Submitting 10 jobs in 10 chunks using cluster functions 'Interactive' ...
    waitForJobs(reg = tmp)
    #> [1] TRUE
    # Target registry: calculate the square root on results of first registry target = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg2' using cluster functions 'Interactive'
    batchMapResults(fun = function(x, y) list(sqrt = sqrt(x$square)), ids = 4:8, target = target, source = tmp)
    #> Adding 5 jobs ...
    submitJobs(reg = target)
    #> Submitting 5 jobs in 5 chunks using cluster functions 'Interactive' ...
    waitForJobs(reg = target)
    #> [1] TRUE
    # Map old to new ids. First, get a table with results and parameters results = unwrap(rjoin(getJobPars(reg = target), reduceResultsDataTable(reg = target))) print(results)
    #> job.id .id sqrt #> 1: 1 4 4 #> 2: 2 5 5 #> 3: 3 6 6 #> 4: 4 7 7 #> 5: 5 8 8
    # Parameter '.id' points to job.id in 'source'. Use a inner join to combine: ijoin(results, unwrap(reduceResultsDataTable(reg = tmp)), by = c(".id" = "job.id"))
    #> job.id .id sqrt square #> 1: 1 4 4 16 #> 2: 2 5 5 25 #> 3: 3 6 6 36 #> 4: 4 7 7 49 #> 5: 5 8 8 64

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/batchReduce.html ================================================ Reduce Operation for Batch Systems — batchReduce • batchtools

    A parallel and asynchronous Reduce for batch systems. Note that this function only defines the computational jobs. Each job reduces a certain number of elements on one slave. The actual computation is started with submitJobs. Results and partial results can be collected with reduceResultsList, reduceResults or loadResult.

    batchReduce(
      fun,
      xs,
      init = NULL,
      chunks = seq_along(xs),
      more.args = list(),
      reg = getDefaultRegistry()
    )

    Arguments

    fun

    [function(aggr, x, ...)]
    Function to reduce xs with.

    xs

    [vector]
    Vector to reduce.

    init

    [ANY]
    Initial object for reducing. See Reduce.

    chunks

    [integer(length(xs))]
    Group for each element of xs. Can be generated with chunk.

    more.args

    [list]
    A list of additional arguments passed to fun.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with ids of added jobs stored in column “job.id”.

    See also

    Examples

    batchtools:::example_push_temp(1) # define function to reduce on slave, we want to sum a vector tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    xs = 1:100 f = function(aggr, x) aggr + x # sum 20 numbers on each slave process, i.e. 5 jobs chunks = chunk(xs, chunk.size = 5) batchReduce(fun = f, 1:100, init = 0, chunks = chunks, reg = tmp)
    #> Adding 20 jobs ...
    submitJobs(reg = tmp)
    #> Submitting 20 jobs in 20 chunks using cluster functions 'Interactive' ...
    waitForJobs(reg = tmp)
    #> [1] TRUE
    # now reduce one final time on master reduceResults(fun = function(aggr, job, res) f(aggr, res), reg = tmp)
    #> [1] 5050

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/batchtools-deprecated.html ================================================ Deprecated function in the batchtools package — batchtools-deprecated • batchtools

    The following functions have been deprecated:

    chunkIdsdeprecated in favor of chunk, lpt and binpack

    Site built with pkgdown.

    ================================================ FILE: docs/reference/batchtools-package.html ================================================ batchtools: Tools for Computation on Batch Systems — batchtools-package • batchtools

    For bug reports and feature requests please use the tracker: https://github.com/mllg/batchtools.

    Package options

    batchtools.verbose

    Verbosity. Set to FALSE to suppress info messages and progress bars.

    batchtools.progress

    Progress bars. Set to FALSE to disable them.

    batchtools.timestamps

    Add time stamps to log output. Set to FALSE to disable them.

    Furthermore, you may enable a debug mode using the debugme package by setting the environment variable “DEBUGME” to “batchtools” before loading batchtools.

    See also

    Author

    Maintainer: Michel Lang michellang@gmail.com (ORCID)

    Authors:

    Other contributors:

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/btlapply.html ================================================ Synchronous Apply Functions — btlapply • batchtools

    This is a set of functions acting as counterparts to the sequential popular apply functions in base R: btlapply for lapply and btmapply for mapply.

    Internally, jobs are created using batchMap on the provided registry. If no registry is provided, a temporary registry (see argument file.dir of makeRegistry) and batchMap will be used. After all jobs are terminated (see waitForJobs), the results are collected and returned as a list.

    Note that these functions are only suitable for short and fail-safe operations on batch system. If some jobs fail, you have to retrieve partial results from the registry directory yourself.

    btlapply(
      X,
      fun,
      ...,
      resources = list(),
      n.chunks = NULL,
      chunk.size = NULL,
      reg = makeRegistry(file.dir = NA)
    )
    
    btmapply(
      fun,
      ...,
      more.args = list(),
      simplify = FALSE,
      use.names = TRUE,
      resources = list(),
      n.chunks = NULL,
      chunk.size = NULL,
      reg = makeRegistry(file.dir = NA)
    )

    Arguments

    X

    [vector]
    Vector to apply over.

    fun

    [function]
    Function to apply.

    ...

    [ANY]
    Additional arguments passed to fun (btlapply) or vectors to map over (btmapply).

    resources

    [named list]
    Computational resources for the jobs to submit. The actual elements of this list (e.g. something like “walltime” or “nodes”) depend on your template file, exceptions are outlined in the section 'Resources'. Default settings for a system can be set in the configuration file by defining the named list default.resources. Note that these settings are merged by name, e.g. merging list(walltime = 300) into list(walltime = 400, memory = 512) will result in list(walltime = 300, memory = 512). Same holds for individual job resources passed as additional column of ids (c.f. section 'Resources').

    n.chunks

    [integer(1)]
    Passed to chunk before submitJobs.

    chunk.size

    [integer(1)]
    Passed to chunk before submitJobs.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    more.args

    [list]
    Additional arguments passed to fun.

    simplify

    [logical(1)]
    Simplify the results using simplify2array?

    use.names

    [logical(1)]
    Use names of the input to name the output?

    Value

    [list] List with the results of the function call.

    Examples

    batchtools:::example_push_temp(1) btlapply(1:3, function(x) x^2)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    #> Adding 3 jobs ...
    #> Submitting 3 jobs in 3 chunks using cluster functions 'Interactive' ...
    #> [[1]] #> [1] 1 #> #> [[2]] #> [1] 4 #> #> [[3]] #> [1] 9 #>
    btmapply(function(x, y, z) x + y + z, x = 1:3, y = 1:3, more.args = list(z = 1), simplify = TRUE)
    #> No readable configuration file found
    #> Created registry in '/tmp/RtmpsmIC4J/registry9e4369cc3bf0' using cluster functions 'Interactive'
    #> Adding 3 jobs ...
    #> Submitting 3 jobs in 3 chunks using cluster functions 'Interactive' ...
    #> [1] 3 5 7

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/cfBrewTemplate.html ================================================ Cluster Functions Helper to Write Job Description Files — cfBrewTemplate • batchtools

    This function is only intended for use in your own cluster functions implementation.

    Calls brew silently on your template, any error will lead to an exception. The file is stored at the same place as the corresponding job file in the “jobs”-subdir of your files directory.

    cfBrewTemplate(reg, text, jc)

    Arguments

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    text

    [character(1)]
    String ready to be brewed. See cfReadBrewTemplate to read a template from the file system.

    jc

    [JobCollection)]
    Will be used as environment to brew the template file in. See JobCollection for a list of all available variables.

    Value

    [character(1)]. File path to brewed template file.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/cfHandleUnknownSubmitError.html ================================================ Cluster Functions Helper to Handle Unknown Errors — cfHandleUnknownSubmitError • batchtools

    This function is only intended for use in your own cluster functions implementation.

    Simply constructs a SubmitJobResult object with status code 101, NA as batch id and an informative error message containing the output of the OS command in output.

    cfHandleUnknownSubmitError(cmd, exit.code, output)

    Arguments

    cmd

    [character(1)]
    OS command used to submit the job, e.g. qsub.

    exit.code

    [integer(1)]
    Exit code of the OS command, should not be 0.

    output

    [character]
    Output of the OS command, hopefully an informative error message. If these are multiple lines in a vector, they are automatically joined.

    Value

    [SubmitJobResult].

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/cfKillJob.html ================================================ Cluster Functions Helper to Kill Batch Jobs — cfKillJob • batchtools

    This function is only intended for use in your own cluster functions implementation.

    Calls the OS command to kill a job via system like this: “cmd batch.job.id”. If the command returns an exit code > 0, the command is repeated after a 1 second sleep max.tries-1 times. If the command failed in all tries, an error is generated.

    cfKillJob(
      reg,
      cmd,
      args = character(0L),
      max.tries = 3L,
      nodename = "localhost"
    )

    Arguments

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    cmd

    [character(1)]
    OS command, e.g. “qdel”.

    args

    [character]
    Arguments to cmd, including the batch id.

    max.tries

    [integer(1)]
    Number of total times to try execute the OS command in cases of failures. Default is 3.

    nodename

    [character(1)]
    Name of the SSH node to run the command on. If set to “localhost” (default), the command is not piped through SSH.

    Value

    TRUE on success. An exception is raised otherwise.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/cfReadBrewTemplate.html ================================================ Cluster Functions Helper to Parse a Brew Template — cfReadBrewTemplate • batchtools

    This function is only intended for use in your own cluster functions implementation.

    This function is only intended for use in your own cluster functions implementation. Simply reads your template file and returns it as a character vector.

    cfReadBrewTemplate(template, comment.string = NA_character_)

    Arguments

    template

    [character(1)]
    Path to template file which is then passed to brew.

    comment.string

    [character(1)]
    Ignore lines starting with this string.

    Value

    [character].

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/chunk.html ================================================ Chunk Jobs for Sequential Execution — chunk • batchtools

    Jobs can be partitioned into “chunks” to be executed sequentially on the computational nodes. Chunks are defined by providing a data frame with columns “job.id” and “chunk” (integer) to submitJobs. All jobs with the same chunk number will be grouped together on one node to form a single computational job.

    The function chunk simply splits x into either a fixed number of groups, or into a variable number of groups with a fixed number of maximum elements.

    The function lpt also groups x into a fixed number of chunks, but uses the actual values of x in a greedy “Longest Processing Time” algorithm. As a result, the maximum sum of elements in minimized.

    binpack splits x into a variable number of groups whose sum of elements do not exceed the upper limit provided by chunk.size.

    See examples of estimateRuntimes for an application of binpack and lpt.

    chunk(x, n.chunks = NULL, chunk.size = NULL, shuffle = TRUE)
    
    lpt(x, n.chunks = 1L)
    
    binpack(x, chunk.size = max(x))

    Arguments

    x

    [numeric]
    For chunk an atomic vector (usually the job.id). For binpack and lpt, the weights to group.

    n.chunks

    [integer(1)]
    Requested number of chunks. The function chunk distributes the number of elements in x evenly while lpt tries to even out the sum of elements in each chunk. If more chunks than necessary are requested, empty chunks are ignored. Mutually exclusive with chunks.size.

    chunk.size

    [integer(1)]
    Requested chunk size for each single chunk. For chunk this is the number of elements in x, for binpack the size is determined by the sum of values in x. Mutually exclusive with n.chunks.

    shuffle

    [logical(1)]
    Shuffles the groups. Default is TRUE.

    Value

    [integer] giving the chunk number for each element of x.

    See also

    Examples

    batchtools:::example_push_temp(2) ch = chunk(1:10, n.chunks = 2) table(ch)
    #> ch #> 1 2 #> 5 5
    ch = chunk(rep(1, 10), chunk.size = 2) table(ch)
    #> ch #> 1 2 3 4 5 #> 2 2 2 2 2
    set.seed(1) x = runif(10) ch = lpt(x, n.chunks = 2) sapply(split(x, ch), sum)
    #> 1 2 #> 2.808393 2.706746
    set.seed(1) x = runif(10) ch = binpack(x, 1) sapply(split(x, ch), sum)
    #> 1 2 3 4 5 6 #> 0.9446753 0.9699941 0.8983897 0.9263065 0.8307960 0.9449773
    # Job chunking tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg1' using cluster functions 'Interactive'
    ids = batchMap(identity, 1:25, reg = tmp)
    #> Adding 25 jobs ...
    ### Group into chunks with 10 jobs each library(data.table) ids[, chunk := chunk(job.id, chunk.size = 10)]
    #> job.id chunk #> 1: 1 3 #> 2: 2 1 #> 3: 3 1 #> 4: 4 2 #> 5: 5 3 #> 6: 6 1 #> 7: 7 3 #> 8: 8 3 #> 9: 9 2 #> 10: 10 1 #> 11: 11 1 #> 12: 12 2 #> 13: 13 2 #> 14: 14 1 #> 15: 15 2 #> 16: 16 1 #> 17: 17 3 #> 18: 18 1 #> 19: 19 2 #> 20: 20 1 #> 21: 21 2 #> 22: 22 3 #> 23: 23 2 #> 24: 24 3 #> 25: 25 3 #> job.id chunk
    print(ids[, .N, by = chunk])
    #> chunk N #> 1: 3 8 #> 2: 1 9 #> 3: 2 8
    ### Group into 4 chunks ids[, chunk := chunk(job.id, n.chunks = 4)]
    #> job.id chunk #> 1: 1 2 #> 2: 2 3 #> 3: 3 4 #> 4: 4 3 #> 5: 5 4 #> 6: 6 1 #> 7: 7 4 #> 8: 8 1 #> 9: 9 2 #> 10: 10 2 #> 11: 11 3 #> 12: 12 3 #> 13: 13 4 #> 14: 14 1 #> 15: 15 3 #> 16: 16 2 #> 17: 17 1 #> 18: 18 2 #> 19: 19 3 #> 20: 20 4 #> 21: 21 1 #> 22: 22 2 #> 23: 23 4 #> 24: 24 1 #> 25: 25 1 #> job.id chunk
    print(ids[, .N, by = chunk])
    #> chunk N #> 1: 2 6 #> 2: 3 6 #> 3: 4 6 #> 4: 1 7
    ### Submit to batch system submitJobs(ids = ids, reg = tmp)
    #> Submitting 25 jobs in 4 chunks using cluster functions 'Interactive' ...
    # Grouped chunking tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg2' using cluster functions 'Interactive'
    prob = addProblem(reg = tmp, "prob1", data = iris, fun = function(job, data) nrow(data))
    #> Adding problem 'prob1'
    prob = addProblem(reg = tmp, "prob2", data = Titanic, fun = function(job, data) nrow(data))
    #> Adding problem 'prob2'
    algo = addAlgorithm(reg = tmp, "algo", fun = function(job, data, instance, i, ...) problem)
    #> Adding algorithm 'algo'
    prob.designs = list(prob1 = data.table(), prob2 = data.table(x = 1:2)) algo.designs = list(algo = data.table(i = 1:3)) addExperiments(prob.designs, algo.designs, repls = 3, reg = tmp)
    #> Adding 9 experiments ('prob1'[1] x 'algo'[3] x repls[3]) ...
    #> Adding 18 experiments ('prob2'[2] x 'algo'[3] x repls[3]) ...
    ### Group into chunks of 5 jobs, but do not put multiple problems into the same chunk # -> only one problem has to be loaded per chunk, and only once because it is cached ids = getJobTable(reg = tmp)[, .(job.id, problem, algorithm)] ids[, chunk := chunk(job.id, chunk.size = 5), by = "problem"]
    #> job.id problem algorithm chunk #> 1: 1 prob1 algo 1 #> 2: 2 prob1 algo 1 #> 3: 3 prob1 algo 2 #> 4: 4 prob1 algo 2 #> 5: 5 prob1 algo 1 #> 6: 6 prob1 algo 2 #> 7: 7 prob1 algo 1 #> 8: 8 prob1 algo 1 #> 9: 9 prob1 algo 2 #> 10: 10 prob2 algo 2 #> 11: 11 prob2 algo 1 #> 12: 12 prob2 algo 1 #> 13: 13 prob2 algo 3 #> 14: 14 prob2 algo 3 #> 15: 15 prob2 algo 3 #> 16: 16 prob2 algo 2 #> 17: 17 prob2 algo 2 #> 18: 18 prob2 algo 2 #> 19: 19 prob2 algo 2 #> 20: 20 prob2 algo 4 #> 21: 21 prob2 algo 1 #> 22: 22 prob2 algo 1 #> 23: 23 prob2 algo 3 #> 24: 24 prob2 algo 4 #> 25: 25 prob2 algo 1 #> 26: 26 prob2 algo 4 #> 27: 27 prob2 algo 4 #> job.id problem algorithm chunk
    ids[, chunk := .GRP, by = c("problem", "chunk")]
    #> job.id problem algorithm chunk #> 1: 1 prob1 algo 1 #> 2: 2 prob1 algo 1 #> 3: 3 prob1 algo 2 #> 4: 4 prob1 algo 2 #> 5: 5 prob1 algo 1 #> 6: 6 prob1 algo 2 #> 7: 7 prob1 algo 1 #> 8: 8 prob1 algo 1 #> 9: 9 prob1 algo 2 #> 10: 10 prob2 algo 3 #> 11: 11 prob2 algo 4 #> 12: 12 prob2 algo 4 #> 13: 13 prob2 algo 5 #> 14: 14 prob2 algo 5 #> 15: 15 prob2 algo 5 #> 16: 16 prob2 algo 3 #> 17: 17 prob2 algo 3 #> 18: 18 prob2 algo 3 #> 19: 19 prob2 algo 3 #> 20: 20 prob2 algo 6 #> 21: 21 prob2 algo 4 #> 22: 22 prob2 algo 4 #> 23: 23 prob2 algo 5 #> 24: 24 prob2 algo 6 #> 25: 25 prob2 algo 4 #> 26: 26 prob2 algo 6 #> 27: 27 prob2 algo 6 #> job.id problem algorithm chunk
    dcast(ids, chunk ~ problem)
    #> Using 'chunk' as value column. Use 'value.var' to override
    #> Aggregate function missing, defaulting to 'length'
    #> chunk prob1 prob2 #> 1: 1 5 0 #> 2: 2 4 0 #> 3: 3 0 5 #> 4: 4 0 5 #> 5: 5 0 4 #> 6: 6 0 4

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/chunkIds.html ================================================ Chunk Jobs for Sequential Execution — chunkIds • batchtools

    This function is deprecated in favor of the more flexible chunk, lpt and binpack.

    chunkIds(ids = NULL, n.chunks = NULL, chunk.size = NULL,
      group.by = character(0L), reg = getDefaultRegistry())

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.

    n.chunks

    [integer(1)]
    Requested number of chunks. The function chunk distributes the number of elements in x evenly while lpt tries to even out the sum of elements in each chunk. If more chunks than necessary are requested, empty chunks are ignored. Mutually exclusive with chunks.size.

    chunk.size

    [integer(1)]
    Requested chunk size for each single chunk. For chunk this is the number of elements in x, for binpack the size is determined by the sum of values in x. Mutually exclusive with n.chunks.

    group.by

    [character(0)]
    If ids is a data.frame with additional columns (in addition to the required column “job.id”), then the chunking is performed using subgroups defined by the columns set in group.by. See example.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with columns “job.id” and “chunk”.

    See also

    Site built with pkgdown.

    ================================================ FILE: docs/reference/clearRegistry.html ================================================ Remove All Jobs — clearRegistry • batchtools

    Removes all jobs from a registry and calls sweepRegistry.

    clearRegistry(reg = getDefaultRegistry())

    Arguments

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/doJobCollection.html ================================================ Execute Jobs of a JobCollection — doJobCollection • batchtools

    Executes every job in a JobCollection. This function is intended to be called on the slave.

    doJobCollection(jc, output = NULL)

    Arguments

    jc

    [JobCollection]
    Either an object of class “JobCollection” as returned by makeJobCollection or a string with the path to file containing a “JobCollection” as RDS file (as stored by submitJobs).

    output

    [character(1)]
    Path to a file to write the output to. Defaults to NULL which means that output is written to the active sink. Do not set this if your scheduler redirects output to a log file.

    Value

    [character(1)]: Hash of the JobCollection executed.

    See also

    Other JobCollection: makeJobCollection()

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    batchMap(identity, 1:2, reg = tmp)
    #> Adding 2 jobs ...
    jc = makeJobCollection(1:2, reg = tmp) doJobCollection(jc)
    #> ### [bt]: This is batchtools v0.9.14 #> ### [bt]: Starting calculation of 2 jobs #> ### [bt]: Setting working directory to '/home/michel/Projekte/batchtools/docs/reference' #> ### [bt]: Memory measurement disabled #> ### [bt]: Starting job [batchtools job.id=1] #> ### [bt]: Setting seed to 1166 ... #> #> ### [bt]: Job terminated successfully [batchtools job.id=1] #> ### [bt]: Starting job [batchtools job.id=2] #> ### [bt]: Setting seed to 1167 ... #> #> ### [bt]: Job terminated successfully [batchtools job.id=2] #> ### [bt]: Calculation finished!

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/estimateRuntimes.html ================================================ Estimate Remaining Runtimes — estimateRuntimes • batchtools

    Estimates the runtimes of jobs using the random forest implemented in ranger. Observed runtimes are retrieved from the Registry and runtimes are predicted for unfinished jobs.

    The estimated remaining time is calculated in the print method. You may also pass n here to determine the number of parallel jobs which is then used in a simple Longest Processing Time (LPT) algorithm to give an estimate for the parallel runtime.

    estimateRuntimes(tab, ..., reg = getDefaultRegistry())
    
    # S3 method for RuntimeEstimate
    print(x, n = 1L, ...)

    Arguments

    tab

    [data.table]
    Table with column “job.id” and additional columns to predict the runtime. Observed runtimes will be looked up in the registry and serve as dependent variable. All columns in tab except “job.id” will be passed to ranger as independent variables to fit the model.

    ...

    [ANY]
    Additional parameters passed to ranger. Ignored for the print method.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    x

    [RuntimeEstimate]
    Object to print.

    n

    [integer(1)]
    Number of parallel jobs to assume for runtime estimation.

    Value

    [RuntimeEstimate] which is a list with two named elements: “runtimes” is a data.table with columns “job.id”, “runtime” (in seconds) and “type” (“estimated” if runtime is estimated, “observed” if runtime was observed). The other element of the list named “model”] contains the fitted random forest object.

    See also

    binpack and lpt to chunk jobs according to their estimated runtimes.

    Examples

    batchtools:::example_push_temp(1) # Create a simple toy registry set.seed(1) tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE, seed = 1)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    addProblem(name = "iris", data = iris, fun = function(data, ...) nrow(data), reg = tmp)
    #> Adding problem 'iris'
    addAlgorithm(name = "nrow", function(instance, ...) nrow(instance), reg = tmp)
    #> Adding algorithm 'nrow'
    addAlgorithm(name = "ncol", function(instance, ...) ncol(instance), reg = tmp)
    #> Adding algorithm 'ncol'
    addExperiments(algo.designs = list(nrow = data.table::CJ(x = 1:50, y = letters[1:5])), reg = tmp)
    #> Adding 250 experiments ('iris'[1] x 'nrow'[250] x repls[1]) ...
    addExperiments(algo.designs = list(ncol = data.table::CJ(x = 1:50, y = letters[1:5])), reg = tmp)
    #> Adding 250 experiments ('iris'[1] x 'ncol'[250] x repls[1]) ...
    # We use the job parameters to predict runtimes tab = unwrap(getJobPars(reg = tmp)) # First we need to submit some jobs so that the forest can train on some data. # Thus, we just sample some jobs from the registry while grouping by factor variables. library(data.table) ids = tab[, .SD[sample(nrow(.SD), 5)], by = c("problem", "algorithm", "y")] setkeyv(ids, "job.id") submitJobs(ids, reg = tmp)
    #> Submitting 50 jobs in 50 chunks using cluster functions 'Interactive' ...
    waitForJobs(reg = tmp)
    #> [1] TRUE
    # We "simulate" some more realistic runtimes here to demonstrate the functionality: # - Algorithm "ncol" is 5 times more expensive than "nrow" # - x has no effect on the runtime # - If y is "a" or "b", the runtimes are really high runtime = function(algorithm, x, y) { ifelse(algorithm == "nrow", 100L, 500L) + 1000L * (y %in% letters[1:2]) } tmp$status[ids, done := done + tab[ids, runtime(algorithm, x, y)]]
    #> job.id def.id submitted started done error mem.used resource.id #> 1: 1 1 NA NA NA <NA> NA NA #> 2: 2 2 NA NA NA <NA> NA NA #> 3: 3 3 NA NA NA <NA> NA NA #> 4: 4 4 NA NA NA <NA> NA NA #> 5: 5 5 NA NA NA <NA> NA NA #> --- #> 496: 496 496 NA NA NA <NA> NA NA #> 497: 497 497 NA NA NA <NA> NA NA #> 498: 498 498 NA NA NA <NA> NA NA #> 499: 499 499 1603265964 1603265964 1603266464 <NA> NA 1 #> 500: 500 500 NA NA NA <NA> NA NA #> batch.id log.file job.hash job.name repl #> 1: <NA> <NA> <NA> <NA> 1 #> 2: <NA> <NA> <NA> <NA> 1 #> 3: <NA> <NA> <NA> <NA> 1 #> 4: <NA> <NA> <NA> <NA> 1 #> 5: <NA> <NA> <NA> <NA> 1 #> --- #> 496: <NA> <NA> <NA> <NA> 1 #> 497: <NA> <NA> <NA> <NA> 1 #> 498: <NA> <NA> <NA> <NA> 1 #> 499: cfInteractive <NA> joba17949c0e00e62405c8465e973297f1c <NA> 1 #> 500: <NA> <NA> <NA> <NA> 1
    rjoin(sjoin(tab, ids), getJobStatus(ids, reg = tmp)[, c("job.id", "time.running")])
    #> job.id problem algorithm x y time.running #> 1: 32 iris nrow 7 b 1100.0026 secs #> 2: 42 iris nrow 9 b 1100.0024 secs #> 3: 47 iris nrow 10 b 1100.0023 secs #> 4: 66 iris nrow 14 a 1100.0052 secs #> 5: 73 iris nrow 15 c 100.0023 secs #> 6: 75 iris nrow 15 e 100.0024 secs #> 7: 86 iris nrow 18 a 1100.0025 secs #> 8: 100 iris nrow 20 e 100.0026 secs #> 9: 101 iris nrow 21 a 1100.0024 secs #> 10: 103 iris nrow 21 c 100.0024 secs #> 11: 123 iris nrow 25 c 100.0024 secs #> 12: 125 iris nrow 25 e 100.0028 secs #> 13: 161 iris nrow 33 a 1100.0026 secs #> 14: 165 iris nrow 33 e 100.0026 secs #> 15: 169 iris nrow 34 d 100.0026 secs #> 16: 183 iris nrow 37 c 100.0027 secs #> 17: 184 iris nrow 37 d 100.0027 secs #> 18: 203 iris nrow 41 c 100.0036 secs #> 19: 207 iris nrow 42 b 1100.0024 secs #> 20: 209 iris nrow 42 d 100.0029 secs #> 21: 220 iris nrow 44 e 100.0023 secs #> 22: 227 iris nrow 46 b 1100.0024 secs #> 23: 229 iris nrow 46 d 100.0023 secs #> 24: 231 iris nrow 47 a 1100.0023 secs #> 25: 244 iris nrow 49 d 100.0022 secs #> 26: 260 iris ncol 2 e 500.0024 secs #> 27: 276 iris ncol 6 a 1500.0025 secs #> 28: 278 iris ncol 6 c 500.0025 secs #> 29: 279 iris ncol 6 d 500.0024 secs #> 30: 296 iris ncol 10 a 1500.0025 secs #> 31: 320 iris ncol 14 e 500.0023 secs #> 32: 340 iris ncol 18 e 500.0023 secs #> 33: 347 iris ncol 20 b 1500.0023 secs #> 34: 363 iris ncol 23 c 500.0023 secs #> 35: 369 iris ncol 24 d 500.0023 secs #> 36: 373 iris ncol 25 c 500.0025 secs #> 37: 387 iris ncol 28 b 1500.0023 secs #> 38: 410 iris ncol 32 e 500.0024 secs #> 39: 421 iris ncol 35 a 1500.0024 secs #> 40: 436 iris ncol 38 a 1500.0024 secs #> 41: 444 iris ncol 39 d 500.0022 secs #> 42: 448 iris ncol 40 c 500.0022 secs #> 43: 456 iris ncol 42 a 1500.0023 secs #> 44: 459 iris ncol 42 d 500.0023 secs #> 45: 467 iris ncol 44 b 1500.0023 secs #> 46: 468 iris ncol 44 c 500.0023 secs #> 47: 475 iris ncol 45 e 500.0024 secs #> 48: 482 iris ncol 47 b 1500.0023 secs #> 49: 492 iris ncol 49 b 1500.0023 secs #> 50: 499 iris ncol 50 d 500.0023 secs #> job.id problem algorithm x y time.running
    # Estimate runtimes: est = estimateRuntimes(tab, reg = tmp) print(est)
    #> Runtime Estimate for 500 jobs with 1 CPUs #> Done : 0d 09h 43m 20.1s #> Remaining: 3d 17h 37m 8.0s #> Total : 4d 03h 20m 28.1s
    rjoin(tab, est$runtimes)
    #> job.id problem algorithm x y type runtime #> 1: 1 iris nrow 1 a estimated 1107.0568 #> 2: 2 iris nrow 1 b estimated 1090.8508 #> 3: 3 iris nrow 1 c estimated 338.2092 #> 4: 4 iris nrow 1 d estimated 318.6349 #> 5: 5 iris nrow 1 e estimated 317.3189 #> --- #> 496: 496 iris ncol 50 a estimated 1381.9162 #> 497: 497 iris ncol 50 b estimated 1389.1659 #> 498: 498 iris ncol 50 c estimated 614.0596 #> 499: 499 iris ncol 50 d observed 500.0023 #> 500: 500 iris ncol 50 e estimated 574.7851
    print(est, n = 10)
    #> Runtime Estimate for 500 jobs with 10 CPUs #> Done : 0d 09h 43m 20.1s #> Remaining: 3d 17h 37m 8.0s #> Parallel : 0d 08h 58m 21.4s #> Total : 4d 03h 20m 28.1s
    # Submit jobs with longest runtime first: ids = est$runtimes[type == "estimated"][order(runtime, decreasing = TRUE)] print(ids)
    #> job.id type runtime #> 1: 466 estimated 1420.0934 #> 2: 461 estimated 1418.7001 #> 3: 462 estimated 1415.5134 #> 4: 457 estimated 1414.7134 #> 5: 487 estimated 1413.4847 #> --- #> 446: 194 estimated 133.0456 #> 447: 185 estimated 133.0030 #> 448: 204 estimated 131.6954 #> 449: 174 estimated 131.5901 #> 450: 179 estimated 130.4434
    if (FALSE) { submitJobs(ids, reg = tmp) } # Group jobs into chunks with runtime < 1h ids = est$runtimes[type == "estimated"] ids[, chunk := binpack(runtime, 3600)]
    #> job.id type runtime chunk #> 1: 1 estimated 1107.0568 47 #> 2: 2 estimated 1090.8508 51 #> 3: 3 estimated 338.2092 37 #> 4: 4 estimated 318.6349 33 #> 5: 5 estimated 317.3189 70 #> --- #> 446: 495 estimated 581.7197 17 #> 447: 496 estimated 1381.9162 20 #> 448: 497 estimated 1389.1659 15 #> 449: 498 estimated 614.0596 4 #> 450: 500 estimated 574.7851 26
    print(ids)
    #> job.id type runtime chunk #> 1: 1 estimated 1107.0568 47 #> 2: 2 estimated 1090.8508 51 #> 3: 3 estimated 338.2092 37 #> 4: 4 estimated 318.6349 33 #> 5: 5 estimated 317.3189 70 #> --- #> 446: 495 estimated 581.7197 17 #> 447: 496 estimated 1381.9162 20 #> 448: 497 estimated 1389.1659 15 #> 449: 498 estimated 614.0596 4 #> 450: 500 estimated 574.7851 26
    print(ids[, list(runtime = sum(runtime)), by = chunk])
    #> chunk runtime #> 1: 47 3493.187 #> 2: 51 3593.783 #> 3: 37 3598.573 #> 4: 33 3599.900 #> 5: 70 3493.489 #> 6: 53 3598.723 #> 7: 71 3491.366 #> 8: 48 3491.841 #> 9: 52 3597.483 #> 10: 54 3587.877 #> 11: 68 3499.779 #> 12: 72 3489.223 #> 13: 55 3583.526 #> 14: 69 3496.272 #> 15: 73 3483.829 #> 16: 46 3519.591 #> 17: 50 3599.943 #> 18: 38 3597.396 #> 19: 65 3512.646 #> 20: 43 3571.763 #> 21: 62 3522.617 #> 22: 66 3511.003 #> 23: 39 3599.908 #> 24: 35 3599.575 #> 25: 61 3533.407 #> 26: 40 3598.645 #> 27: 56 3571.361 #> 28: 57 3565.133 #> 29: 49 3481.931 #> 30: 42 3583.160 #> 31: 58 3555.775 #> 32: 60 3535.954 #> 33: 41 3588.180 #> 34: 36 3599.425 #> 35: 59 3545.174 #> 36: 44 3541.279 #> 37: 34 3599.586 #> 38: 64 3514.492 #> 39: 45 3540.479 #> 40: 63 3517.610 #> 41: 67 3507.819 #> 42: 27 3598.911 #> 43: 24 3599.823 #> 44: 25 3590.607 #> 45: 26 3598.511 #> 46: 23 3599.593 #> 47: 28 3573.496 #> 48: 75 3599.916 #> 49: 12 3559.937 #> 50: 74 3474.824 #> 51: 8 3593.188 #> 52: 20 3521.159 #> 53: 31 3599.784 #> 54: 7 3595.855 #> 55: 5 3594.254 #> 56: 11 3563.352 #> 57: 10 3575.839 #> 58: 6 3599.450 #> 59: 32 3598.576 #> 60: 80 3492.129 #> 61: 82 3471.066 #> 62: 83 3599.780 #> 63: 79 3501.372 #> 64: 76 3593.842 #> 65: 85 3588.259 #> 66: 89 3553.760 #> 67: 91 2151.522 #> 68: 81 3481.753 #> 69: 78 3513.014 #> 70: 87 3570.795 #> 71: 88 3563.106 #> 72: 77 3529.443 #> 73: 3 3599.295 #> 74: 86 3578.904 #> 75: 90 3529.605 #> 76: 2 3599.210 #> 77: 84 3596.381 #> 78: 1 3599.788 #> 79: 4 3595.377 #> 80: 9 3583.777 #> 81: 29 3558.408 #> 82: 18 3572.866 #> 83: 15 3583.955 #> 84: 21 3599.004 #> 85: 19 3567.117 #> 86: 16 3582.283 #> 87: 30 3550.130 #> 88: 17 3578.532 #> 89: 22 3599.427 #> 90: 13 3599.265 #> 91: 14 3595.019 #> chunk runtime
    if (FALSE) { submitJobs(ids, reg = tmp) } # Group jobs into 10 chunks with similar runtime ids = est$runtimes[type == "estimated"] ids[, chunk := lpt(runtime, 10)]
    #> job.id type runtime chunk #> 1: 1 estimated 1107.0568 4 #> 2: 2 estimated 1090.8508 9 #> 3: 3 estimated 338.2092 4 #> 4: 4 estimated 318.6349 8 #> 5: 5 estimated 317.3189 6 #> --- #> 446: 495 estimated 581.7197 2 #> 447: 496 estimated 1381.9162 9 #> 448: 497 estimated 1389.1659 2 #> 449: 498 estimated 614.0596 2 #> 450: 500 estimated 574.7851 1
    print(ids[, list(runtime = sum(runtime)), by = chunk])
    #> chunk runtime #> 1: 4 32227.40 #> 2: 9 32226.68 #> 3: 8 32231.22 #> 4: 6 32293.22 #> 5: 1 32226.47 #> 6: 3 32292.92 #> 7: 10 32227.16 #> 8: 5 32301.32 #> 9: 2 32301.36 #> 10: 7 32300.22

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/execJob.html ================================================ Execute a Single Jobs — execJob • batchtools

    Executes a single job (as created by makeJob) and returns its result. Also works for Experiments.

    execJob(job)

    Arguments

    job

    [Job | Experiment]
    Job/Experiment to execute.

    Value

    Result of the job.

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    batchMap(identity, 1:2, reg = tmp)
    #> Adding 2 jobs ...
    job = makeJob(1, reg = tmp) execJob(job)
    #> ### [bt]: Setting seed to 12825 ...
    #> [1] 1

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/findConfFile.html ================================================ Find a batchtools Configuration File — findConfFile • batchtools

    This functions returns the path to the first configuration file found in the following locations:

    1. File “batchtools.conf.R” in the path specified by the environment variable “R_BATCHTOOLS_SEARCH_PATH”.

    2. File “batchtools.conf.R” in the current working directory.

    3. File “config.R” in the user configuration directory as reported by rappdirs::user_config_dir("batchtools", expand = FALSE) (depending on OS, e.g., on linux this usually resolves to “~/.config/batchtools/config.R”).

    4. “.batchtools.conf.R” in the home directory (“~”).

    5. “config.R” in the site config directory as reported by rappdirs::site_config_dir("batchtools") (depending on OS). This file can be used for admins to set sane defaults for a computation site.

    findConfFile()

    Value

    [character(1)] Path to the configuration file or NA if no configuration file was found.

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/findJobs.html ================================================ Find and Filter Jobs — findJobs • batchtools

    These functions are used to find and filter jobs, depending on either their parameters (findJobs and findExperiments), their tags (findTagged), or their computational status (all other functions, see getStatus for an overview).

    Note that findQueued, findRunning, findOnSystem and findExpired are somewhat heuristic and may report misleading results, depending on the state of the system and the ClusterFunctions implementation.

    See JoinTables for convenient set operations (unions, intersects, differences) on tables with job ids.

    findJobs(expr, ids = NULL, reg = getDefaultRegistry())
    
    findExperiments(
      ids = NULL,
      prob.name = NA_character_,
      prob.pattern = NA_character_,
      algo.name = NA_character_,
      algo.pattern = NA_character_,
      prob.pars,
      algo.pars,
      repls = NULL,
      reg = getDefaultRegistry()
    )
    
    findSubmitted(ids = NULL, reg = getDefaultRegistry())
    
    findNotSubmitted(ids = NULL, reg = getDefaultRegistry())
    
    findStarted(ids = NULL, reg = getDefaultRegistry())
    
    findNotStarted(ids = NULL, reg = getDefaultRegistry())
    
    findDone(ids = NULL, reg = getDefaultRegistry())
    
    findNotDone(ids = NULL, reg = getDefaultRegistry())
    
    findErrors(ids = NULL, reg = getDefaultRegistry())
    
    findOnSystem(ids = NULL, reg = getDefaultRegistry())
    
    findRunning(ids = NULL, reg = getDefaultRegistry())
    
    findQueued(ids = NULL, reg = getDefaultRegistry())
    
    findExpired(ids = NULL, reg = getDefaultRegistry())
    
    findTagged(tags = character(0L), ids = NULL, reg = getDefaultRegistry())

    Arguments

    expr

    [expression]
    Predicate expression evaluated in the job parameters. Jobs for which expr evaluates to TRUE are returned.

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    prob.name

    [character]
    Exact name of the problem (no substring matching). If not provided, all problems are matched.

    prob.pattern

    [character]
    Regular expression pattern to match problem names. If not provided, all problems are matched.

    algo.name

    [character]
    Exact name of the problem (no substring matching). If not provided, all algorithms are matched.

    algo.pattern

    [character]
    Regular expression pattern to match algorithm names. If not provided, all algorithms are matched.

    prob.pars

    [expression]
    Predicate expression evaluated in the problem parameters.

    algo.pars

    [expression]
    Predicate expression evaluated in the algorithm parameters.

    repls

    [integer]
    Whitelist of replication numbers. If not provided, all replications are matched.

    tags

    [character]
    Return jobs which are tagged with any of the tags provided.

    Value

    [data.table] with column “job.id” containing matched jobs.

    See also

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    batchMap(identity, i = 1:3, reg = tmp)
    #> Adding 3 jobs ...
    ids = findNotSubmitted(reg = tmp) # get all jobs: findJobs(reg = tmp)
    #> job.id #> 1: 1 #> 2: 2 #> 3: 3
    # filter for jobs with parameter i >= 2 findJobs(i >= 2, reg = tmp) # filter on the computational status findSubmitted(reg = tmp)
    #> Empty data.table (0 rows and 1 cols): job.id
    findNotDone(reg = tmp)
    #> job.id #> 1: 1 #> 2: 2 #> 3: 3
    # filter on tags addJobTags(2:3, "my_tag", reg = tmp) findTagged(tags = "my_tag", reg = tmp)
    #> job.id #> 1: 2 #> 2: 3
    # combine filter functions using joins # -> jobs which are not done and not tagged (using an anti-join): ajoin(findNotDone(reg = tmp), findTagged("my_tag", reg = tmp))
    #> job.id #> 1: 1

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/findTemplateFile.html ================================================ Find a batchtools Template File — findTemplateFile • batchtools

    This functions returns the path to a template file on the file system.

    findTemplateFile(template)

    Arguments

    template

    [character(1)]
    Either a path to a brew template file (with extension “tmpl”), or a short descriptive name enabling the following heuristic for the file lookup:

    1. “batchtools.[template].tmpl” in the path specified by the environment variable “R_BATCHTOOLS_SEARCH_PATH”.

    2. “batchtools.[template].tmpl” in the current working directory.

    3. “[template].tmpl” in the user config directory (see user_config_dir); on linux this is usually “~/.config/batchtools/[template].tmpl”.

    4. “.batchtools.[template].tmpl” in the home directory.

    5. “[template].tmpl” in the package installation directory in the subfolder “templates”.

    Value

    [character] Path to the file or NA if no template template file was found.

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/getDefaultRegistry.html ================================================ Get and Set the Default Registry — getDefaultRegistry • batchtools

    getDefaultRegistry returns the registry currently set as default (or stops with an exception if none is set). setDefaultRegistry sets a registry as default.

    getDefaultRegistry()
    
    setDefaultRegistry(reg)

    Arguments

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/getErrorMessages.html ================================================ Retrieve Error Messages — getErrorMessages • batchtools

    Extracts error messages from the internal data base and returns them in a table.

    getErrorMessages(
      ids = NULL,
      missing.as.error = FALSE,
      reg = getDefaultRegistry()
    )

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of findErrors. Invalid ids are ignored.

    missing.as.error

    [logical(1)]
    Treat missing results as errors? If TRUE, the error message “[not terminated]” is imputed for jobs which have not terminated. Default is FALSE

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with columns “job.id”, “terminated” (logical), “error” (logical) and “message” (string).

    See also

    Other debug: getStatus(), grepLogs(), killJobs(), resetJobs(), showLog(), testJob()

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    fun = function(i) if (i == 3) stop(i) else i ids = batchMap(fun, i = 1:5, reg = tmp)
    #> Adding 5 jobs ...
    submitJobs(1:4, reg = tmp)
    #> Submitting 4 jobs in 4 chunks using cluster functions 'Interactive' ...
    #> Error in (function (i) : 3
    waitForJobs(1:4, reg = tmp)
    #> [1] FALSE
    getErrorMessages(ids, reg = tmp)
    #> job.id terminated error message #> 1: 1 TRUE FALSE <NA> #> 2: 2 TRUE FALSE <NA> #> 3: 3 TRUE TRUE Error in (function (i) : 3 #> 4: 4 TRUE FALSE <NA> #> 5: 5 FALSE FALSE <NA>
    getErrorMessages(ids, missing.as.error = TRUE, reg = tmp)
    #> job.id terminated error message #> 1: 1 TRUE FALSE <NA> #> 2: 2 TRUE FALSE <NA> #> 3: 3 TRUE TRUE Error in (function (i) : 3 #> 4: 4 TRUE FALSE <NA> #> 5: 5 FALSE TRUE [not terminated]

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/getJobTable.html ================================================ Query Job Information — getJobTable • batchtools

    getJobStatus returns the internal table which stores information about the computational status of jobs, getJobPars a table with the job parameters, getJobResources a table with the resources which were set to submit the jobs, and getJobTags the tags of the jobs (see Tags).

    getJobTable returns all these tables joined.

    getJobTable(ids = NULL, reg = getDefaultRegistry())
    
    getJobStatus(ids = NULL, reg = getDefaultRegistry())
    
    getJobResources(ids = NULL, reg = getDefaultRegistry())
    
    getJobPars(ids = NULL, reg = getDefaultRegistry())
    
    getJobTags(ids = NULL, reg = getDefaultRegistry())

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with the following columns (not necessarily in this order):

    job.id

    Unique Job ID as integer.

    submitted

    Time the job was submitted to the batch system as POSIXct.

    started

    Time the job was started on the batch system as POSIXct.

    done

    Time the job terminated (successfully or with an error) as POSIXct.

    error

    Either NA if the job terminated successfully or the error message.

    mem.used

    Estimate of the memory usage.

    batch.id

    Batch ID as reported by the scheduler.

    log.file

    Log file. If missing, defaults to [job.hash].log.

    job.hash

    Unique string identifying the job or chunk.

    time.queued

    Time in seconds (as difftime) the job was queued.

    time.running

    Time in seconds (as difftime) the job was running.

    pars

    List of parameters/arguments for this job.

    resources

    List of computational resources set for this job.

    tags

    Tags as joined string, delimited by “,”.

    problem

    Only for ExperimentRegistry: the problem identifier.

    algorithm

    Only for ExperimentRegistry: the algorithm identifier.

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    f = function(x) if (x < 0) stop("x must be > 0") else sqrt(x) batchMap(f, x = c(-1, 0, 1), reg = tmp)
    #> Adding 3 jobs ...
    submitJobs(reg = tmp)
    #> Submitting 3 jobs in 3 chunks using cluster functions 'Interactive' ...
    #> Error in (function (x) : x must be > 0
    waitForJobs(reg = tmp)
    #> [1] FALSE
    addJobTags(1:2, "tag1", reg = tmp) addJobTags(2, "tag2", reg = tmp) # Complete table: getJobTable(reg = tmp)
    #> job.id submitted started done #> 1: 1 2020-10-21 09:39:25 2020-10-21 09:39:25 2020-10-21 09:39:25 #> 2: 2 2020-10-21 09:39:25 2020-10-21 09:39:25 2020-10-21 09:39:25 #> 3: 3 2020-10-21 09:39:25 2020-10-21 09:39:25 2020-10-21 09:39:25 #> error mem.used batch.id log.file #> 1: Error in (function (x) : x must be > 0 NA cfInteractive <NA> #> 2: <NA> NA cfInteractive <NA> #> 3: <NA> NA cfInteractive <NA> #> job.hash job.name time.queued #> 1: job5adb5742954ec70e5dc4621612e5638e <NA> 0.002799988 secs #> 2: job5c0144440e7359c3570442e591fd68a7 <NA> 0.002799988 secs #> 3: jobcedfc453688181879b03f66b5e6e5f25 <NA> 0.002799988 secs #> time.running job.pars resources tags #> 1: 0.002099991 secs <list[1]> <list[0]> tag1 #> 2: 0.001900196 secs <list[1]> <list[0]> tag1,tag2 #> 3: 0.001899958 secs <list[1]> <list[0]> <NA>
    # Job parameters: getJobPars(reg = tmp)
    #> job.id job.pars #> 1: 1 <list[1]> #> 2: 2 <list[1]> #> 3: 3 <list[1]>
    # Set and retrieve tags: getJobTags(reg = tmp)
    #> job.id tags #> 1: 1 tag1 #> 2: 2 tag1,tag2 #> 3: 3 <NA>
    # Job parameters with tags right-joined: rjoin(getJobPars(reg = tmp), getJobTags(reg = tmp))
    #> job.id job.pars tags #> 1: 1 <list[1]> tag1 #> 2: 2 <list[1]> tag1,tag2 #> 3: 3 <list[1]> <NA>

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/getStatus.html ================================================ Summarize the Computational Status — getStatus • batchtools

    This function gives an encompassing overview over the computational status on your system. The status can be one or many of the following:

    • “defined”: Jobs which are defined via batchMap or addExperiments, but are not yet submitted.

    • “submitted”: Jobs which are submitted to the batch system via submitJobs, scheduled for execution.

    • “started”: Jobs which have been started.

    • “done”: Jobs which terminated successfully.

    • “error”: Jobs which terminated with an exception.

    • “running”: Jobs which are listed by the cluster functions to be running on the live system. Not supported for all cluster functions.

    • “queued”: Jobs which are listed by the cluster functions to be queued on the live system. Not supported for all cluster functions.

    • “system”: Jobs which are listed by the cluster functions to be queued or running. Not supported for all cluster functions.

    • “expired”: Jobs which have been submitted, but vanished from the live system. Note that this is determined heuristically and may include some false positives.

    Here, a job which terminated successfully counts towards the jobs which are submitted, started and done. To retrieve the corresponding job ids, see findJobs.

    getStatus(ids = NULL, reg = getDefaultRegistry())

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] (with class “Status” for printing).

    See also

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    fun = function(i) if (i == 3) stop(i) else i ids = batchMap(fun, i = 1:5, reg = tmp)
    #> Adding 5 jobs ...
    submitJobs(ids = 1:4, reg = tmp)
    #> Submitting 4 jobs in 4 chunks using cluster functions 'Interactive' ...
    #> Error in (function (i) : 3
    waitForJobs(reg = tmp)
    #> [1] FALSE
    tab = getStatus(reg = tmp) print(tab)
    #> Status for 5 jobs at 2020-10-21 09:39:26: #> Submitted : 4 ( 80.0%) #> -- Queued : 0 ( 0.0%) #> -- Started : 4 ( 80.0%) #> ---- Running : 0 ( 0.0%) #> ---- Done : 3 ( 60.0%) #> ---- Error : 1 ( 20.0%) #> ---- Expired : 0 ( 0.0%)
    str(tab)
    #> Classes ‘Status’, ‘data.table’ and 'data.frame': 1 obs. of 9 variables: #> $ defined : int 5 #> $ submitted: int 4 #> $ started : int 4 #> $ done : int 3 #> $ error : int 1 #> $ queued : int 0 #> $ running : int 0 #> $ expired : int 0 #> $ system : int 0 #> - attr(*, ".internal.selfref")=<externalptr>

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/grepLogs.html ================================================ Grep Log Files for a Pattern — grepLogs • batchtools

    Crawls through log files and reports jobs with lines matching the pattern. See showLog for an example.

    grepLogs(
      ids = NULL,
      pattern,
      ignore.case = FALSE,
      fixed = FALSE,
      reg = getDefaultRegistry()
    )

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of findStarted. Invalid ids are ignored.

    pattern

    [character(1L)]
    Regular expression or string (see fixed).

    ignore.case

    [logical(1L)]
    If TRUE the match will be performed case insensitively.

    fixed

    [logical(1L)]
    If FALSE (default), pattern is a regular expression and a fixed string otherwise.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with columns “job.id” and “message”.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/index.html ================================================ Function reference • batchtools

    Overview

    batchtools-package

    batchtools: Tools for Computation on Batch Systems

    Registry

    makeRegistry()

    Registry Constructor

    makeExperimentRegistry()

    ExperimentRegistry Constructor

    assertRegistry()

    assertRegistry

    loadRegistry()

    Load a Registry from the File System

    saveRegistry()

    Store the Registy to the File System

    syncRegistry()

    Synchronize the Registry

    sweepRegistry()

    Check Consistency and Remove Obsolete Information

    removeRegistry()

    Remove a Registry from the File System

    getDefaultRegistry() setDefaultRegistry()

    Get and Set the Default Registry

    Define Jobs

    batchMap()

    Map Operation for Batch Systems

    batchReduce()

    Reduce Operation for Batch Systems

    batchMapResults()

    Map Over Results to Create New Jobs

    addProblem() removeProblems()

    Define Problems for Experiments

    addAlgorithm() removeAlgorithms()

    Define Algorithms for Experiments

    addExperiments()

    Add Experiments to the Registry

    Submit Jobs

    submitJobs()

    Submit Jobs to the Batch Systems

    batchExport()

    Export Objects to the Slaves

    waitForJobs()

    Wait for Termination of Jobs

    chunk() lpt() binpack()

    Chunk Jobs for Sequential Execution

    setJobNames() getJobNames()

    Set and Retrieve Job Names

    Query Job Information

    getStatus()

    Summarize the Computational Status

    findJobs() findExperiments() findSubmitted() findNotSubmitted() findStarted() findNotStarted() findDone() findNotDone() findErrors() findOnSystem() findRunning() findQueued() findExpired() findTagged()

    Find and Filter Jobs

    getJobTable() getJobStatus() getJobResources() getJobPars() getJobTags()

    Query Job Information

    summarizeExperiments()

    Quick Summary over Experiments

    Retrieve Results

    reduceResults()

    Reduce Results

    reduceResultsList() reduceResultsDataTable()

    Apply Functions on Results

    loadResult()

    Load the Result of a Single Job

    unwrap() flatten()

    Unwrap Nested Data Frames

    Debugging

    resetJobs()

    Reset the Computational State of Jobs

    testJob()

    Run Jobs Interactively

    showLog() getLog()

    Inspect Log Files

    getErrorMessages()

    Retrieve Error Messages

    grepLogs()

    Grep Log Files for a Pattern

    Remove Jobs

    killJobs()

    Kill Jobs

    clearRegistry()

    Remove All Jobs

    removeExperiments()

    Remove Experiments

    Additional objects

    makeJob()

    Jobs and Experiments

    makeJobCollection()

    JobCollection Constructor

    Cluster Functions

    cfKillJob()

    Cluster Functions Helper to Kill Batch Jobs

    cfBrewTemplate()

    Cluster Functions Helper to Write Job Description Files

    cfReadBrewTemplate()

    Cluster Functions Helper to Parse a Brew Template

    cfHandleUnknownSubmitError()

    Cluster Functions Helper to Handle Unknown Errors

    makeClusterFunctions()

    ClusterFunctions Constructor

    makeClusterFunctionsDocker()

    ClusterFunctions for Docker

    makeClusterFunctionsInteractive()

    ClusterFunctions for Sequential Execution in the Running R Session

    makeClusterFunctionsLSF()

    ClusterFunctions for LSF Systems

    makeClusterFunctionsMulticore()

    ClusterFunctions for Parallel Multicore Execution

    makeClusterFunctionsOpenLava()

    ClusterFunctions for OpenLava

    makeClusterFunctionsSGE()

    ClusterFunctions for SGE Systems

    makeClusterFunctionsSSH()

    ClusterFunctions for Remote SSH Execution

    makeClusterFunctionsSlurm()

    ClusterFunctions for Slurm Systems

    makeClusterFunctionsSocket()

    ClusterFunctions for Parallel Socket Execution

    makeClusterFunctionsTORQUE()

    ClusterFunctions for OpenPBS/TORQUE Systems

    makeSubmitJobResult()

    Create a SubmitJobResult

    runHook()

    Trigger Evaluation of Custom Function

    Worker

    Create a Linux-Worker

    Miscellaneous

    addJobTags() removeJobTags() getUsedJobTags()

    Add or Remove Job Tags

    btlapply() btmapply()

    Synchronous Apply Functions

    ijoin() ljoin() rjoin() ojoin() sjoin() ajoin() ujoin()

    Inner, Left, Right, Outer, Semi and Anti Join for Data Tables

    runOSCommand()

    Run OS Commands on Local or Remote Machines

    execJob()

    Execute a Single Jobs

    doJobCollection()

    Execute Jobs of a JobCollection

    estimateRuntimes() print(<RuntimeEstimate>)

    Estimate Remaining Runtimes

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/killJobs.html ================================================ Kill Jobs — killJobs • batchtools

    Kill jobs which are currently running on the batch system.

    In case of an error when killing, the function tries - after a short sleep - to kill the remaining batch jobs again. If this fails three times for some jobs, the function gives up. Jobs that could be successfully killed are reset in the Registry.

    killJobs(ids = NULL, reg = getDefaultRegistry())

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of findOnSystem. Invalid ids are ignored.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with columns “job.id”, the corresponding “batch.id” and the logical flag “killed” indicating success.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/loadRegistry.html ================================================ Load a Registry from the File System — loadRegistry • batchtools

    Loads a registry from its file.dir.

    Multiple R sessions accessing the same registry simultaneously can lead to database inconsistencies. This is especially dangerous if the same file.dir is accessed from multiple machines, e.g. via a mount.

    If you just need to check on the status or peek into some preliminary results while another process is still submitting or waiting for pending results, you can load the registry in a read-only mode. All operations that need to change the registry will raise an exception in this mode. Files communicated back by the computational nodes are parsed to update the registry in memory while the registry on the file system remains unchanged.

    A heuristic tries to detect if the registry has been altered in the background by an other process and in this case automatically restricts the current registry to read-only mode. However, you should rely on this heuristic to work flawlessly. Thus, set to writeable to TRUE if and only if you are absolutely sure that other state-changing processes are terminated.

    If you need write access, load the registry with writeable set to TRUE.

    loadRegistry(
      file.dir,
      work.dir = NULL,
      conf.file = findConfFile(),
      make.default = TRUE,
      writeable = FALSE
    )

    Arguments

    file.dir

    [character(1)]
    Path where all files of the registry are saved. Default is directory “registry” in the current working directory. The provided path will get normalized unless it is given relative to the home directory (i.e., starting with “~”). Note that some templates do not handle relative paths well.

    If you pass NA, a temporary directory will be used. This way, you can create disposable registries for btlapply or examples. By default, the temporary directory tempdir() will be used. If you want to use another directory, e.g. a directory which is shared between nodes, you can set it in your configuration file by setting the variable temp.dir.

    work.dir

    [character(1)]
    Working directory for R process for running jobs. Defaults to the working directory currently set during Registry construction (see getwd). loadRegistry uses the stored work.dir, but you may also explicitly overwrite it, e.g., after switching to another system.

    The provided path will get normalized unless it is given relative to the home directory (i.e., starting with “~”). Note that some templates do not handle relative paths well.

    conf.file

    [character(1)]
    Path to a configuration file which is sourced while the registry is created. In the configuration file you can define how batchtools interacts with the system via ClusterFunctions. Separating the configuration of the underlying host system from the R code allows to easily move computation to another site.

    The file lookup is implemented in the internal (but exported) function findConfFile which returns the first file found of the following candidates:

    1. File “batchtools.conf.R” in the path specified by the environment variable “R_BATCHTOOLS_SEARCH_PATH”.

    2. File “batchtools.conf.R” in the current working directory.

    3. File “config.R” in the user configuration directory as reported by rappdirs::user_config_dir("batchtools", expand = FALSE) (depending on OS, e.g., on linux this usually resolves to “~/.config/batchtools/config.R”).

    4. “.batchtools.conf.R” in the home directory (“~”).

    5. “config.R” in the site config directory as reported by rappdirs::site_config_dir("batchtools") (depending on OS). This file can be used for admins to set sane defaults for a computation site.

    Set to NA if you want to suppress reading any configuration file. If a configuration file is found, it gets sourced inside the environment of the registry after the defaults for all variables are set. Therefore you can set and overwrite slots, e.g. default.resources = list(walltime = 3600) to set default resources or “max.concurrent.jobs” to limit the number of jobs allowed to run simultaneously on the system.

    make.default

    [logical(1)]
    If set to TRUE, the created registry is saved inside the package namespace and acts as default registry. You might want to switch this off if you work with multiple registries simultaneously. Default is TRUE.

    writeable

    [logical(1)]
    Loads the registry in read-write mode. Default is FALSE.

    Value

    [Registry].

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/loadResult.html ================================================ Load the Result of a Single Job — loadResult • batchtools

    Loads the result of a single job.

    loadResult(id, reg = getDefaultRegistry())

    Arguments

    id

    [integer(1) or data.table]
    Single integer to specify the job or a data.table with column job.id and exactly one row.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [ANY]. The stored result.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctions.html ================================================ ClusterFunctions Constructor — makeClusterFunctions • batchtools

    This is the constructor used to create custom cluster functions. Note that some standard implementations for TORQUE, Slurm, LSF, SGE, etc. ship with the package.

    makeClusterFunctions(
      name,
      submitJob,
      killJob = NULL,
      listJobsQueued = NULL,
      listJobsRunning = NULL,
      array.var = NA_character_,
      store.job.collection = FALSE,
      store.job.files = FALSE,
      scheduler.latency = 0,
      fs.latency = 0,
      hooks = list()
    )

    Arguments

    name

    [character(1)]
    Name of cluster functions.

    submitJob

    [function(reg, jc, ...)]
    Function to submit new jobs. Must return a SubmitJobResult object. The arguments are reg (Registry) and jobs (JobCollection).

    killJob

    [function(reg, batch.id)]
    Function to kill a job on the batch system. Make sure that you definitely kill the job! Return value is currently ignored. Must have the arguments reg (Registry) and batch.id (character(1) as returned by submitJob). Note that there is a helper function cfKillJob to repeatedly try to kill jobs. Set killJob to NULL if killing jobs cannot be supported.

    listJobsQueued

    [function(reg)]
    List all queued jobs on the batch system for the current user. Must return an character vector of batch ids, same format as they are returned by submitJob. Set listJobsQueued to NULL if listing of queued jobs is not supported.

    listJobsRunning

    [function(reg)]
    List all running jobs on the batch system for the current user. Must return an character vector of batch ids, same format as they are returned by submitJob. It does not matter if you return a few job ids too many (e.g. all for the current user instead of all for the current registry), but you have to include all relevant ones. Must have the argument are reg (Registry). Set listJobsRunning to NULL if listing of running jobs is not supported.

    array.var

    [character(1)]
    Name of the environment variable set by the scheduler to identify IDs of job arrays. Default is NA for no array support.

    store.job.collection

    [logical(1)]
    Flag to indicate that the cluster function implementation of submitJob can not directly handle JobCollection objects. If set to FALSE, the JobCollection is serialized to the file system before submitting the job.

    store.job.files

    [logical(1)]
    Flag to indicate that job files need to be stored in the file directory. If set to FALSE (default), the job file is created in a temporary directory, otherwise (or if the debug mode is enabled) in the subdirectory jobs of the file.dir.

    scheduler.latency

    [numeric(1)]
    Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling submitJobs.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    hooks

    [list]
    Named list of functions which will we called on certain events like “pre.submit” or “post.sync”. See Hooks.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctionsDocker.html ================================================ ClusterFunctions for Docker — makeClusterFunctionsDocker • batchtools

    Cluster functions for Docker/Docker Swarm (https://docs.docker.com/engine/swarm/).

    The submitJob function executes docker [docker.args] run --detach=true [image.args] [resources] [image] [cmd]. Arguments docker.args, image.args and image can be set on construction. The resources part takes the named resources ncpus and memory from submitJobs and maps them to the arguments --cpu-shares and --memory (in Megabytes). The resource threads is mapped to the environment variables “OMP_NUM_THREADS” and “OPENBLAS_NUM_THREADS”. To reliably identify jobs in the swarm, jobs are labeled with “batchtools=[job.hash]” and named using the current login name (label “user”) and the job hash (label “batchtools”).

    listJobsRunning uses docker [docker.args] ps --format={{.ID}} to filter for running jobs.

    killJobs uses docker [docker.args] kill [batch.id] to filter for running jobs.

    These cluster functions use a Hook to remove finished jobs before a new submit and every time the Registry is synchronized (using syncRegistry). This is currently required because docker does not remove terminated containers automatically. Use docker ps -a --filter 'label=batchtools' --filter 'status=exited' to identify and remove terminated containers manually (or usa a cron job).

    makeClusterFunctionsDocker(
      image,
      docker.args = character(0L),
      image.args = character(0L),
      scheduler.latency = 1,
      fs.latency = 65
    )

    Arguments

    image

    [character(1)]
    Name of the docker image to run.

    docker.args

    [character]
    Additional arguments passed to “docker” *before* the command (“run”, “ps” or “kill”) to execute (e.g., the docker host).

    image.args

    [character]
    Additional arguments passed to “docker run” (e.g., to define mounts or environment variables).

    scheduler.latency

    [numeric(1)]
    Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling submitJobs.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    Value

    [ClusterFunctions].

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctionsInteractive.html ================================================ ClusterFunctions for Sequential Execution in the Running R Session — makeClusterFunctionsInteractive • batchtools

    All jobs are executed sequentially using the current R process in which submitJobs is called. Thus, submitJob blocks the session until the job has finished. The main use of this ClusterFunctions implementation is to test and debug programs on a local computer.

    Listing jobs returns an empty vector (as no jobs can be running when you call this) and killJob is not implemented for the same reasons.

    makeClusterFunctionsInteractive(
      external = FALSE,
      write.logs = TRUE,
      fs.latency = 0
    )

    Arguments

    external

    [logical(1)]
    If set to TRUE, jobs are started in a fresh R session instead of currently active but still waits for its termination. Default is FALSE.

    write.logs

    [logical(1)]
    Sink the output to log files. Turning logging off can increase the speed of calculations but makes it very difficult to debug. Default is TRUE.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    Value

    [ClusterFunctions].

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctionsLSF.html ================================================ ClusterFunctions for LSF Systems — makeClusterFunctionsLSF • batchtools

    Cluster functions for LSF (https://www.ibm.com/products/hpc-workload-management).

    Job files are created based on the brew template template.file. This file is processed with brew and then submitted to the queue using the bsub command. Jobs are killed using the bkill command and the list of running jobs is retrieved using bjobs -u $USER -w. The user must have the appropriate privileges to submit, delete and list jobs on the cluster (this is usually the case).

    The template file can access all resources passed to submitJobs as well as all variables stored in the JobCollection. It is the template file's job to choose a queue for the job and handle the desired resource allocations.

    makeClusterFunctionsLSF(
      template = "lsf",
      scheduler.latency = 1,
      fs.latency = 65
    )

    Arguments

    template

    [character(1)]
    Either a path to a brew template file (with extension “tmpl”), or a short descriptive name enabling the following heuristic for the file lookup:

    1. “batchtools.[template].tmpl” in the path specified by the environment variable “R_BATCHTOOLS_SEARCH_PATH”.

    2. “batchtools.[template].tmpl” in the current working directory.

    3. “[template].tmpl” in the user config directory (see user_config_dir); on linux this is usually “~/.config/batchtools/[template].tmpl”.

    4. “.batchtools.[template].tmpl” in the home directory.

    5. “[template].tmpl” in the package installation directory in the subfolder “templates”.

    scheduler.latency

    [numeric(1)]
    Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling submitJobs.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    Value

    [ClusterFunctions].

    Note

    Array jobs are currently not supported.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctionsMulticore.html ================================================ ClusterFunctions for Parallel Multicore Execution — makeClusterFunctionsMulticore • batchtools

    Jobs are spawned asynchronously using the functions mcparallel and mccollect (both in parallel). Does not work on Windows, use makeClusterFunctionsSocket instead.

    makeClusterFunctionsMulticore(ncpus = NA_integer_, fs.latency = 0)

    Arguments

    ncpus

    [integer(1)]
    Number of CPUs. Default is to use all logical cores. The total number of cores "available" can be set via the option mc.cores and defaults to the heuristic implemented in detectCores.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    Value

    [ClusterFunctions].

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctionsOpenLava.html ================================================ ClusterFunctions for OpenLava — makeClusterFunctionsOpenLava • batchtools

    Cluster functions for OpenLava.

    Job files are created based on the brew template template. This file is processed with brew and then submitted to the queue using the bsub command. Jobs are killed using the bkill command and the list of running jobs is retrieved using bjobs -u $USER -w. The user must have the appropriate privileges to submit, delete and list jobs on the cluster (this is usually the case).

    The template file can access all resources passed to submitJobs as well as all variables stored in the JobCollection. It is the template file's job to choose a queue for the job and handle the desired resource allocations.

    makeClusterFunctionsOpenLava(
      template = "openlava",
      scheduler.latency = 1,
      fs.latency = 65
    )

    Arguments

    template

    [character(1)]
    Either a path to a brew template file (with extension “tmpl”), or a short descriptive name enabling the following heuristic for the file lookup:

    1. “batchtools.[template].tmpl” in the path specified by the environment variable “R_BATCHTOOLS_SEARCH_PATH”.

    2. “batchtools.[template].tmpl” in the current working directory.

    3. “[template].tmpl” in the user config directory (see user_config_dir); on linux this is usually “~/.config/batchtools/[template].tmpl”.

    4. “.batchtools.[template].tmpl” in the home directory.

    5. “[template].tmpl” in the package installation directory in the subfolder “templates”.

    scheduler.latency

    [numeric(1)]
    Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling submitJobs.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    Value

    [ClusterFunctions].

    Note

    Array jobs are currently not supported.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctionsSGE.html ================================================ ClusterFunctions for SGE Systems — makeClusterFunctionsSGE • batchtools

    Cluster functions for Univa Grid Engine / Oracle Grid Engine / Sun Grid Engine (https://www.univa.com/).

    Job files are created based on the brew template template. This file is processed with brew and then submitted to the queue using the qsub command. Jobs are killed using the qdel command and the list of running jobs is retrieved using qselect. The user must have the appropriate privileges to submit, delete and list jobs on the cluster (this is usually the case).

    The template file can access all resources passed to submitJobs as well as all variables stored in the JobCollection. It is the template file's job to choose a queue for the job and handle the desired resource allocations.

    makeClusterFunctionsSGE(
      template = "sge",
      nodename = "localhost",
      scheduler.latency = 1,
      fs.latency = 65
    )

    Arguments

    template

    [character(1)]
    Either a path to a brew template file (with extension “tmpl”), or a short descriptive name enabling the following heuristic for the file lookup:

    1. “batchtools.[template].tmpl” in the path specified by the environment variable “R_BATCHTOOLS_SEARCH_PATH”.

    2. “batchtools.[template].tmpl” in the current working directory.

    3. “[template].tmpl” in the user config directory (see user_config_dir); on linux this is usually “~/.config/batchtools/[template].tmpl”.

    4. “.batchtools.[template].tmpl” in the home directory.

    5. “[template].tmpl” in the package installation directory in the subfolder “templates”.

    nodename

    [character(1)]
    Nodename of the master host. All commands are send via SSH to this host. Only works iff

    1. Passwordless authentication (e.g., via SSH public key authentication) is set up.

    2. The file directory is shared across machines, e.g. mounted via SSHFS.

    3. Either the absolute path to the file.dir is identical on the machines, or paths are provided relative to the home directory. Symbolic links should work.

    scheduler.latency

    [numeric(1)]
    Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling submitJobs.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    Value

    [ClusterFunctions].

    Note

    Array jobs are currently not supported.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctionsSSH.html ================================================ ClusterFunctions for Remote SSH Execution — makeClusterFunctionsSSH • batchtools

    Jobs are spawned by starting multiple R sessions via Rscript over SSH. If the hostname of the Worker equals “localhost”, Rscript is called directly so that you do not need to have an SSH client installed.

    makeClusterFunctionsSSH(workers, fs.latency = 65)

    Arguments

    workers

    [list of Worker]
    List of Workers as constructed with Worker.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    Value

    [ClusterFunctions].

    Note

    If you use a custom “.ssh/config” file, make sure your ProxyCommand passes ‘-q’ to ssh, otherwise each output will end with the message “Killed by signal 1” and this will break the communication with the nodes.

    See also

    Examples

    if (FALSE) { # cluster functions for multicore execution on the local machine makeClusterFunctionsSSH(list(Worker$new("localhost", ncpus = 2))) }

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctionsSlurm.html ================================================ ClusterFunctions for Slurm Systems — makeClusterFunctionsSlurm • batchtools

    Cluster functions for Slurm (https://slurm.schedmd.com/).

    Job files are created based on the brew template template.file. This file is processed with brew and then submitted to the queue using the sbatch command. Jobs are killed using the scancel command and the list of running jobs is retrieved using squeue. The user must have the appropriate privileges to submit, delete and list jobs on the cluster (this is usually the case).

    The template file can access all resources passed to submitJobs as well as all variables stored in the JobCollection. It is the template file's job to choose a queue for the job and handle the desired resource allocations.

    Note that you might have to specify the cluster name here if you do not want to use the default, otherwise the commands for listing and killing jobs will not work.

    makeClusterFunctionsSlurm(
      template = "slurm",
      array.jobs = TRUE,
      nodename = "localhost",
      scheduler.latency = 1,
      fs.latency = 65
    )

    Arguments

    template

    [character(1)]
    Either a path to a brew template file (with extension “tmpl”), or a short descriptive name enabling the following heuristic for the file lookup:

    1. “batchtools.[template].tmpl” in the path specified by the environment variable “R_BATCHTOOLS_SEARCH_PATH”.

    2. “batchtools.[template].tmpl” in the current working directory.

    3. “[template].tmpl” in the user config directory (see user_config_dir); on linux this is usually “~/.config/batchtools/[template].tmpl”.

    4. “.batchtools.[template].tmpl” in the home directory.

    5. “[template].tmpl” in the package installation directory in the subfolder “templates”.

    array.jobs

    [logical(1)]
    If array jobs are disabled on the computing site, set to FALSE.

    nodename

    [character(1)]
    Nodename of the master host. All commands are send via SSH to this host. Only works iff

    1. Passwordless authentication (e.g., via SSH public key authentication) is set up.

    2. The file directory is shared across machines, e.g. mounted via SSHFS.

    3. Either the absolute path to the file.dir is identical on the machines, or paths are provided relative to the home directory. Symbolic links should work.

    scheduler.latency

    [numeric(1)]
    Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling submitJobs.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    Value

    [ClusterFunctions].

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctionsSocket.html ================================================ ClusterFunctions for Parallel Socket Execution — makeClusterFunctionsSocket • batchtools

    Jobs are spawned asynchronously using the package snow.

    makeClusterFunctionsSocket(ncpus = NA_integer_, fs.latency = 65)

    Arguments

    ncpus

    [integer(1)]
    Number of CPUs. Default is to use all logical cores. The total number of cores "available" can be set via the option mc.cores and defaults to the heuristic implemented in detectCores.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    Value

    [ClusterFunctions].

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeClusterFunctionsTORQUE.html ================================================ ClusterFunctions for OpenPBS/TORQUE Systems — makeClusterFunctionsTORQUE • batchtools

    Cluster functions for TORQUE/PBS (https://adaptivecomputing.com/cherry-services/torque-resource-manager/).

    Job files are created based on the brew template template.file. This file is processed with brew and then submitted to the queue using the qsub command. Jobs are killed using the qdel command and the list of running jobs is retrieved using qselect. The user must have the appropriate privileges to submit, delete and list jobs on the cluster (this is usually the case).

    The template file can access all resources passed to submitJobs as well as all variables stored in the JobCollection. It is the template file's job to choose a queue for the job and handle the desired resource allocations.

    makeClusterFunctionsTORQUE(
      template = "torque",
      scheduler.latency = 1,
      fs.latency = 65
    )

    Arguments

    template

    [character(1)]
    Either a path to a brew template file (with extension “tmpl”), or a short descriptive name enabling the following heuristic for the file lookup:

    1. “batchtools.[template].tmpl” in the path specified by the environment variable “R_BATCHTOOLS_SEARCH_PATH”.

    2. “batchtools.[template].tmpl” in the current working directory.

    3. “[template].tmpl” in the user config directory (see user_config_dir); on linux this is usually “~/.config/batchtools/[template].tmpl”.

    4. “.batchtools.[template].tmpl” in the home directory.

    5. “[template].tmpl” in the package installation directory in the subfolder “templates”.

    scheduler.latency

    [numeric(1)]
    Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling submitJobs.

    fs.latency

    [numeric(1)]
    Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to 0 to disable the heuristic, e.g. if you are working on a local file system.

    Value

    [ClusterFunctions].

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeExperimentRegistry.html ================================================ ExperimentRegistry Constructor — makeExperimentRegistry • batchtools

    makeExperimentRegistry constructs a special Registry which is suitable for the definition of large scale computer experiments.

    Each experiments consists of a Problem and an Algorithm. These can be parametrized with addExperiments to actually define computational jobs.

    makeExperimentRegistry(
      file.dir = "registry",
      work.dir = getwd(),
      conf.file = findConfFile(),
      packages = character(0L),
      namespaces = character(0L),
      source = character(0L),
      load = character(0L),
      seed = NULL,
      make.default = TRUE
    )

    Arguments

    file.dir

    [character(1)]
    Path where all files of the registry are saved. Default is directory “registry” in the current working directory. The provided path will get normalized unless it is given relative to the home directory (i.e., starting with “~”). Note that some templates do not handle relative paths well.

    If you pass NA, a temporary directory will be used. This way, you can create disposable registries for btlapply or examples. By default, the temporary directory tempdir() will be used. If you want to use another directory, e.g. a directory which is shared between nodes, you can set it in your configuration file by setting the variable temp.dir.

    work.dir

    [character(1)]
    Working directory for R process for running jobs. Defaults to the working directory currently set during Registry construction (see getwd). loadRegistry uses the stored work.dir, but you may also explicitly overwrite it, e.g., after switching to another system.

    The provided path will get normalized unless it is given relative to the home directory (i.e., starting with “~”). Note that some templates do not handle relative paths well.

    conf.file

    [character(1)]
    Path to a configuration file which is sourced while the registry is created. In the configuration file you can define how batchtools interacts with the system via ClusterFunctions. Separating the configuration of the underlying host system from the R code allows to easily move computation to another site.

    The file lookup is implemented in the internal (but exported) function findConfFile which returns the first file found of the following candidates:

    1. File “batchtools.conf.R” in the path specified by the environment variable “R_BATCHTOOLS_SEARCH_PATH”.

    2. File “batchtools.conf.R” in the current working directory.

    3. File “config.R” in the user configuration directory as reported by rappdirs::user_config_dir("batchtools", expand = FALSE) (depending on OS, e.g., on linux this usually resolves to “~/.config/batchtools/config.R”).

    4. “.batchtools.conf.R” in the home directory (“~”).

    5. “config.R” in the site config directory as reported by rappdirs::site_config_dir("batchtools") (depending on OS). This file can be used for admins to set sane defaults for a computation site.

    Set to NA if you want to suppress reading any configuration file. If a configuration file is found, it gets sourced inside the environment of the registry after the defaults for all variables are set. Therefore you can set and overwrite slots, e.g. default.resources = list(walltime = 3600) to set default resources or “max.concurrent.jobs” to limit the number of jobs allowed to run simultaneously on the system.

    packages

    [character]
    Packages that will always be loaded on each node. Uses require internally. Default is character(0).

    namespaces

    [character]
    Same as packages, but the packages will not be attached. Uses requireNamespace internally. Default is character(0).

    source

    [character]
    Files which should be sourced on the slaves prior to executing a job. Calls sys.source using the .GlobalEnv.

    load

    [character]
    Files which should be loaded on the slaves prior to executing a job. Calls load using the .GlobalEnv.

    seed

    [integer(1)]
    Start seed for jobs. Each job uses the (seed + job.id) as seed. Default is a random integer between 1 and 32768

    make.default

    [logical(1)]
    If set to TRUE, the created registry is saved inside the package namespace and acts as default registry. You might want to switch this off if you work with multiple registries simultaneously. Default is TRUE.

    Value

    [ExperimentRegistry].

    Examples

    batchtools:::example_push_temp(1) tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    # Definde one problem, two algorithms and add them with some parameters: addProblem(reg = tmp, "p1", fun = function(job, data, n, mean, sd, ...) rnorm(n, mean = mean, sd = sd))
    #> Adding problem 'p1'
    addAlgorithm(reg = tmp, "a1", fun = function(job, data, instance, ...) mean(instance))
    #> Adding algorithm 'a1'
    addAlgorithm(reg = tmp, "a2", fun = function(job, data, instance, ...) median(instance))
    #> Adding algorithm 'a2'
    ids = addExperiments(reg = tmp, list(p1 = data.table::CJ(n = c(50, 100), mean = -2:2, sd = 1:4)))
    #> Adding 40 experiments ('p1'[40] x 'a1'[1] x repls[1]) ...
    #> Adding 40 experiments ('p1'[40] x 'a2'[1] x repls[1]) ...
    # Overview over defined experiments: tmp$problems
    #> [1] "p1"
    tmp$algorithms
    #> [1] "a1" "a2"
    #> problem algorithm .count #> 1: p1 a1 40 #> 2: p1 a2 40
    summarizeExperiments(reg = tmp, by = c("problem", "algorithm", "n"))
    #> problem algorithm n .count #> 1: p1 a1 50 20 #> 2: p1 a1 100 20 #> 3: p1 a2 50 20 #> 4: p1 a2 100 20
    ids = findExperiments(prob.pars = (n == 50), reg = tmp) print(unwrap(getJobPars(ids, reg = tmp)))
    #> job.id problem algorithm n mean sd #> 1: 1 p1 a1 50 -2 1 #> 2: 2 p1 a1 50 -2 2 #> 3: 3 p1 a1 50 -2 3 #> 4: 4 p1 a1 50 -2 4 #> 5: 5 p1 a1 50 -1 1 #> 6: 6 p1 a1 50 -1 2 #> 7: 7 p1 a1 50 -1 3 #> 8: 8 p1 a1 50 -1 4 #> 9: 9 p1 a1 50 0 1 #> 10: 10 p1 a1 50 0 2 #> 11: 11 p1 a1 50 0 3 #> 12: 12 p1 a1 50 0 4 #> 13: 13 p1 a1 50 1 1 #> 14: 14 p1 a1 50 1 2 #> 15: 15 p1 a1 50 1 3 #> 16: 16 p1 a1 50 1 4 #> 17: 17 p1 a1 50 2 1 #> 18: 18 p1 a1 50 2 2 #> 19: 19 p1 a1 50 2 3 #> 20: 20 p1 a1 50 2 4 #> 21: 41 p1 a2 50 -2 1 #> 22: 42 p1 a2 50 -2 2 #> 23: 43 p1 a2 50 -2 3 #> 24: 44 p1 a2 50 -2 4 #> 25: 45 p1 a2 50 -1 1 #> 26: 46 p1 a2 50 -1 2 #> 27: 47 p1 a2 50 -1 3 #> 28: 48 p1 a2 50 -1 4 #> 29: 49 p1 a2 50 0 1 #> 30: 50 p1 a2 50 0 2 #> 31: 51 p1 a2 50 0 3 #> 32: 52 p1 a2 50 0 4 #> 33: 53 p1 a2 50 1 1 #> 34: 54 p1 a2 50 1 2 #> 35: 55 p1 a2 50 1 3 #> 36: 56 p1 a2 50 1 4 #> 37: 57 p1 a2 50 2 1 #> 38: 58 p1 a2 50 2 2 #> 39: 59 p1 a2 50 2 3 #> 40: 60 p1 a2 50 2 4 #> job.id problem algorithm n mean sd
    # Submit jobs submitJobs(reg = tmp)
    #> Submitting 80 jobs in 80 chunks using cluster functions 'Interactive' ...
    waitForJobs(reg = tmp)
    #> [1] TRUE
    # Reduce the results of algorithm a1 ids.mean = findExperiments(algo.name = "a1", reg = tmp) reduceResults(ids.mean, fun = function(aggr, res, ...) c(aggr, res), reg = tmp)
    #> [1] -2.00073766 -1.83988515 -2.19491590 -1.88744748 -1.16661898 -1.01586977 #> [7] -0.52913619 -0.63585094 -0.09331735 0.11880843 0.22120996 1.22939842 #> [13] 1.00632897 1.26918801 2.11616673 1.81935795 1.90243597 1.93950861 #> [19] 2.33621891 2.81313094 -1.95745534 -2.29431806 -1.82269005 -1.80889629 #> [25] -0.91933173 -1.03820621 -0.95531362 -1.49309041 0.09203207 0.05360571 #> [31] 0.04219920 -0.11534443 1.14488890 0.83504259 0.82821428 1.07835718 #> [37] 2.06274541 2.28296085 2.26426388 1.74159301
    # Join info table with all results and calculate mean of results # grouped by n and algorithm ids = findDone(reg = tmp) pars = unwrap(getJobPars(ids, reg = tmp)) results = unwrap(reduceResultsDataTable(ids, fun = function(res) list(res = res), reg = tmp)) tab = ljoin(pars, results) tab[, list(mres = mean(res)), by = c("n", "algorithm")]
    #> n algorithm mres #> 1: 50 a1 0.270398674 #> 2: 100 a1 0.001062847 #> 3: 50 a2 0.068935673 #> 4: 100 a2 0.020262158

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeRegistry.html ================================================ Registry Constructor — makeRegistry • batchtools

    makeRegistry constructs the inter-communication object for all functions in batchtools. All communication transactions are processed via the file system: All information required to run a job is stored as JobCollection in a file in the a subdirectory of the file.dir directory. Each jobs stores its results as well as computational status information (start time, end time, error message, ...) also on the file system which is regular merged parsed by the master using syncRegistry. After integrating the new information into the Registry, the Registry is serialized to the file system via saveRegistry. Both syncRegistry and saveRegistry are called whenever required internally. Therefore it should be safe to quit the R session at any time. Work can later be resumed by calling loadRegistry which de-serializes the registry from the file system.

    The registry created last is saved in the package namespace (unless make.default is set to FALSE) and can be retrieved via getDefaultRegistry.

    Canceled jobs and jobs submitted multiple times may leave stray files behind. These can be swept using sweepRegistry. clearRegistry completely erases all jobs from a registry, including log files and results, and thus allows you to start over.

    makeRegistry(
      file.dir = "registry",
      work.dir = getwd(),
      conf.file = findConfFile(),
      packages = character(0L),
      namespaces = character(0L),
      source = character(0L),
      load = character(0L),
      seed = NULL,
      make.default = TRUE
    )

    Arguments

    file.dir

    [character(1)]
    Path where all files of the registry are saved. Default is directory “registry” in the current working directory. The provided path will get normalized unless it is given relative to the home directory (i.e., starting with “~”). Note that some templates do not handle relative paths well.

    If you pass NA, a temporary directory will be used. This way, you can create disposable registries for btlapply or examples. By default, the temporary directory tempdir() will be used. If you want to use another directory, e.g. a directory which is shared between nodes, you can set it in your configuration file by setting the variable temp.dir.

    work.dir

    [character(1)]
    Working directory for R process for running jobs. Defaults to the working directory currently set during Registry construction (see getwd). loadRegistry uses the stored work.dir, but you may also explicitly overwrite it, e.g., after switching to another system.

    The provided path will get normalized unless it is given relative to the home directory (i.e., starting with “~”). Note that some templates do not handle relative paths well.

    conf.file

    [character(1)]
    Path to a configuration file which is sourced while the registry is created. In the configuration file you can define how batchtools interacts with the system via ClusterFunctions. Separating the configuration of the underlying host system from the R code allows to easily move computation to another site.

    The file lookup is implemented in the internal (but exported) function findConfFile which returns the first file found of the following candidates:

    1. File “batchtools.conf.R” in the path specified by the environment variable “R_BATCHTOOLS_SEARCH_PATH”.

    2. File “batchtools.conf.R” in the current working directory.

    3. File “config.R” in the user configuration directory as reported by rappdirs::user_config_dir("batchtools", expand = FALSE) (depending on OS, e.g., on linux this usually resolves to “~/.config/batchtools/config.R”).

    4. “.batchtools.conf.R” in the home directory (“~”).

    5. “config.R” in the site config directory as reported by rappdirs::site_config_dir("batchtools") (depending on OS). This file can be used for admins to set sane defaults for a computation site.

    Set to NA if you want to suppress reading any configuration file. If a configuration file is found, it gets sourced inside the environment of the registry after the defaults for all variables are set. Therefore you can set and overwrite slots, e.g. default.resources = list(walltime = 3600) to set default resources or “max.concurrent.jobs” to limit the number of jobs allowed to run simultaneously on the system.

    packages

    [character]
    Packages that will always be loaded on each node. Uses require internally. Default is character(0).

    namespaces

    [character]
    Same as packages, but the packages will not be attached. Uses requireNamespace internally. Default is character(0).

    source

    [character]
    Files which should be sourced on the slaves prior to executing a job. Calls sys.source using the .GlobalEnv.

    load

    [character]
    Files which should be loaded on the slaves prior to executing a job. Calls load using the .GlobalEnv.

    seed

    [integer(1)]
    Start seed for jobs. Each job uses the (seed + job.id) as seed. Default is a random integer between 1 and 32768

    make.default

    [logical(1)]
    If set to TRUE, the created registry is saved inside the package namespace and acts as default registry. You might want to switch this off if you work with multiple registries simultaneously. Default is TRUE.

    Value

    [environment] of class “Registry” with the following slots:

    file.dir [path]:

    File directory.

    work.dir [path]:

    Working directory.

    temp.dir [path]:

    Temporary directory. Used if file.dir is NA to create temporary registries.

    packages [character()]:

    Packages to load on the slaves.

    namespaces [character()]:

    Namespaces to load on the slaves.

    seed [integer(1)]:

    Registry seed. Before each job is executed, the seed seed + job.id is set.

    cluster.functions [cluster.functions]:

    Usually set in your conf.file. Set via a call to makeClusterFunctions. See example.

    default.resources [named list()]:

    Usually set in your conf.file. Named list of default resources.

    max.concurrent.jobs [integer(1)]:

    Usually set in your conf.file. Maximum number of concurrent jobs for a single user and current registry on the system. submitJobs will try to respect this setting. The resource “max.concurrent.jobs” has higher precedence.

    defs [data.table]:

    Table with job definitions (i.e. parameters).

    status [data.table]:

    Table holding information about the computational status. Also see getJobStatus.

    resources [data.table]:

    Table holding information about the computational resources used for the job. Also see getJobResources.

    tags [data.table]:

    Table holding information about tags. See Tags.

    hash [character(1)]:

    Unique hash which changes each time the registry gets saved to the file system. Can be utilized to invalidate the cache of knitr.

    Details

    Currently batchtools understands the following options set via the configuration file:

    cluster.functions:

    As returned by a constructor, e.g. makeClusterFunctionsSlurm.

    default.resources:

    List of resources to use. Will be overruled by resources specified via submitJobs.

    temp.dir:

    Path to directory to use for temporary registries.

    sleep:

    Custom sleep function. See waitForJobs.

    expire.after:

    Number of iterations before treating jobs as expired in waitForJobs.

    compress:

    Compression algorithm to use via saveRDS.

    See also

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    print(tmp)
    #> Job Registry #> Backend : Interactive #> File dir : /tmp/batchtools-example/reg #> Work dir : /home/michel/Projekte/batchtools/docs/reference #> Jobs : 0 #> Seed : 5075 #> Writeable: TRUE
    # Set cluster functions to interactive mode and start jobs in external R sessions tmp$cluster.functions = makeClusterFunctionsInteractive(external = TRUE) # Change packages to load tmp$packages = c("MASS") saveRegistry(reg = tmp)
    #> [1] TRUE

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/makeSubmitJobResult.html ================================================ Create a SubmitJobResult — makeSubmitJobResult • batchtools

    This function is only intended for use in your own cluster functions implementation.

    Use this function in your implementation of makeClusterFunctions to create a return value for the submitJob function.

    makeSubmitJobResult(
      status,
      batch.id,
      log.file = NA_character_,
      msg = NA_character_
    )

    Arguments

    status

    [integer(1)]
    Launch status of job. 0 means success, codes between 1 and 100 are temporary errors and any error greater than 100 is a permanent failure.

    batch.id

    [character()]
    Unique id of this job on batch system, as given by the batch system. Must be globally unique so that the job can be terminated using just this information. For array jobs, this may be a vector of length equal to the number of jobs in the array.

    log.file

    [character()]
    Log file. If NA, defaults to [job.hash].log. Some cluster functions set this for array jobs.

    msg

    [character(1)]
    Optional error message in case status is not equal to 0. Default is “OK”, “TEMPERROR”, “ERROR”, depending on status.

    Value

    [SubmitJobResult]. A list, containing status, batch.id and msg.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/reduceResults.html ================================================ Reduce Results — reduceResults • batchtools

    A version of Reduce for Registry objects which iterates over finished jobs and aggregates them. All jobs must have terminated, an error is raised otherwise.

    reduceResults(fun, ids = NULL, init, ..., reg = getDefaultRegistry())

    Arguments

    fun

    [function]
    A function to reduce the results. The result of previous iterations (or the init) will be passed as first argument, the result of of the i-th iteration as second. See Reduce for some examples. If the function has the formal argument “job”, the Job/Experiment is also passed to the function (named).

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of findDone. Invalid ids are ignored.

    init

    [ANY]
    Initial element, as used in Reduce. If missing, the reduction uses the result of the first job as init and the reduction starts with the second job.

    ...

    [ANY]
    Additional arguments passed to function fun.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    Aggregated results in the same order as provided ids. Return type depends on the user function. If ids is empty, reduceResults returns init (if available) or NULL otherwise.

    Note

    If you have thousands of jobs, disabling the progress bar (options(batchtools.progress = FALSE)) can significantly increase the performance.

    See also

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    batchMap(function(a, b) list(sum = a+b, prod = a*b), a = 1:3, b = 1:3, reg = tmp)
    #> Adding 3 jobs ...
    submitJobs(reg = tmp)
    #> Submitting 3 jobs in 3 chunks using cluster functions 'Interactive' ...
    waitForJobs(reg = tmp)
    #> [1] TRUE
    # Extract element sum from each result reduceResults(function(aggr, res) c(aggr, res$sum), init = list(), reg = tmp)
    #> [[1]] #> [1] 2 #> #> [[2]] #> [1] 4 #> #> [[3]] #> [1] 6 #>
    # Aggregate element sum via '+' reduceResults(function(aggr, res) aggr + res$sum, init = 0, reg = tmp)
    #> [1] 12
    # Aggregate element prod via '*' where parameter b < 3 reduce = function(aggr, res, job) { if (job$pars$b >= 3) return(aggr) aggr * res$prod } reduceResults(reduce, init = 1, reg = tmp)
    #> [1] 4
    # Reduce to data.frame() (inefficient, use reduceResultsDataTable() instead) reduceResults(rbind, init = data.frame(), reg = tmp)
    #> sum prod #> 1 2 1 #> 2 4 4 #> 3 6 9
    # Reduce to data.frame by collecting results first, then utilize vectorization of rbind: res = reduceResultsList(fun = as.data.frame, reg = tmp) do.call(rbind, res)
    #> sum prod #> 1 2 1 #> 2 4 4 #> 3 6 9
    # Reduce with custom combine function: comb = function(x, y) list(sum = x$sum + y$sum, prod = x$prod * y$prod) reduceResults(comb, reg = tmp)
    #> $sum #> [1] 12 #> #> $prod #> [1] 36 #>
    # The same with neutral element NULL comb = function(x, y) if (is.null(x)) y else list(sum = x$sum + y$sum, prod = x$prod * y$prod) reduceResults(comb, init = NULL, reg = tmp)
    #> $sum #> [1] 12 #> #> $prod #> [1] 36 #>
    # Alternative: Reduce in list, reduce manually in a 2nd step res = reduceResultsList(reg = tmp) Reduce(comb, res)
    #> $sum #> [1] 12 #> #> $prod #> [1] 36 #>

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/reduceResultsList.html ================================================ Apply Functions on Results — reduceResultsList • batchtools

    Applies a function on the results of your finished jobs and thereby collects them in a list or data.table. The later requires the provided function to return a list (or data.frame) of scalar values. See rbindlist for features and limitations of the aggregation.

    If not all jobs are terminated, the respective result will be NULL.

    reduceResultsList(
      ids = NULL,
      fun = NULL,
      ...,
      missing.val,
      reg = getDefaultRegistry()
    )
    
    reduceResultsDataTable(
      ids = NULL,
      fun = NULL,
      ...,
      missing.val,
      reg = getDefaultRegistry()
    )

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of findDone. Invalid ids are ignored.

    fun

    [function]
    Function to apply to each result. The result is passed unnamed as first argument. If NULL, the identity is used. If the function has the formal argument “job”, the Job/Experiment is also passed to the function.

    ...

    [ANY]
    Additional arguments passed to to function fun.

    missing.val

    [ANY]
    Value to impute as result for a job which is not finished. If not provided and a result is missing, an exception is raised.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    reduceResultsList returns a list of the results in the same order as the provided ids. reduceResultsDataTable returns a data.table with columns “job.id” and additional result columns created via rbindlist, sorted by “job.id”.

    Note

    If you have thousands of jobs, disabling the progress bar (options(batchtools.progress = FALSE)) can significantly increase the performance.

    See also

    Examples

    batchtools:::example_push_temp(2) ### Example 1 - reduceResultsList tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg1' using cluster functions 'Interactive'
    batchMap(function(x) x^2, x = 1:10, reg = tmp)
    #> Adding 10 jobs ...
    submitJobs(reg = tmp)
    #> Submitting 10 jobs in 10 chunks using cluster functions 'Interactive' ...
    waitForJobs(reg = tmp)
    #> [1] TRUE
    reduceResultsList(fun = sqrt, reg = tmp)
    #> [[1]] #> [1] 1 #> #> [[2]] #> [1] 2 #> #> [[3]] #> [1] 3 #> #> [[4]] #> [1] 4 #> #> [[5]] #> [1] 5 #> #> [[6]] #> [1] 6 #> #> [[7]] #> [1] 7 #> #> [[8]] #> [1] 8 #> #> [[9]] #> [1] 9 #> #> [[10]] #> [1] 10 #>
    ### Example 2 - reduceResultsDataTable tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg2' using cluster functions 'Interactive'
    # add first problem fun = function(job, data, n, mean, sd, ...) rnorm(n, mean = mean, sd = sd) addProblem("rnorm", fun = fun, reg = tmp)
    #> Adding problem 'rnorm'
    # add second problem fun = function(job, data, n, lambda, ...) rexp(n, rate = lambda) addProblem("rexp", fun = fun, reg = tmp)
    #> Adding problem 'rexp'
    # add first algorithm fun = function(instance, method, ...) if (method == "mean") mean(instance) else median(instance) addAlgorithm("average", fun = fun, reg = tmp)
    #> Adding algorithm 'average'
    # add second algorithm fun = function(instance, ...) sd(instance) addAlgorithm("deviation", fun = fun, reg = tmp)
    #> Adding algorithm 'deviation'
    # define problem and algorithm designs library(data.table) prob.designs = algo.designs = list() prob.designs$rnorm = CJ(n = 100, mean = -1:1, sd = 1:5) prob.designs$rexp = data.table(n = 100, lambda = 1:5) algo.designs$average = data.table(method = c("mean", "median")) algo.designs$deviation = data.table() # add experiments and submit addExperiments(prob.designs, algo.designs, reg = tmp)
    #> Adding 30 experiments ('rnorm'[15] x 'average'[2] x repls[1]) ...
    #> Adding 15 experiments ('rnorm'[15] x 'deviation'[1] x repls[1]) ...
    #> Adding 10 experiments ('rexp'[5] x 'average'[2] x repls[1]) ...
    #> Adding 5 experiments ('rexp'[5] x 'deviation'[1] x repls[1]) ...
    submitJobs(reg = tmp)
    #> Submitting 60 jobs in 60 chunks using cluster functions 'Interactive' ...
    # collect results and join them with problem and algorithm paramters res = ijoin( getJobPars(reg = tmp), reduceResultsDataTable(reg = tmp, fun = function(x) list(res = x)) ) unwrap(res, sep = ".")
    #> job.id problem algorithm prob.pars.n prob.pars.mean prob.pars.sd #> 1: 1 rnorm average 100 -1 1 #> 2: 2 rnorm average 100 -1 1 #> 3: 3 rnorm average 100 -1 2 #> 4: 4 rnorm average 100 -1 2 #> 5: 5 rnorm average 100 -1 3 #> 6: 6 rnorm average 100 -1 3 #> 7: 7 rnorm average 100 -1 4 #> 8: 8 rnorm average 100 -1 4 #> 9: 9 rnorm average 100 -1 5 #> 10: 10 rnorm average 100 -1 5 #> 11: 11 rnorm average 100 0 1 #> 12: 12 rnorm average 100 0 1 #> 13: 13 rnorm average 100 0 2 #> 14: 14 rnorm average 100 0 2 #> 15: 15 rnorm average 100 0 3 #> 16: 16 rnorm average 100 0 3 #> 17: 17 rnorm average 100 0 4 #> 18: 18 rnorm average 100 0 4 #> 19: 19 rnorm average 100 0 5 #> 20: 20 rnorm average 100 0 5 #> 21: 21 rnorm average 100 1 1 #> 22: 22 rnorm average 100 1 1 #> 23: 23 rnorm average 100 1 2 #> 24: 24 rnorm average 100 1 2 #> 25: 25 rnorm average 100 1 3 #> 26: 26 rnorm average 100 1 3 #> 27: 27 rnorm average 100 1 4 #> 28: 28 rnorm average 100 1 4 #> 29: 29 rnorm average 100 1 5 #> 30: 30 rnorm average 100 1 5 #> 31: 31 rnorm deviation 100 -1 1 #> 32: 32 rnorm deviation 100 -1 2 #> 33: 33 rnorm deviation 100 -1 3 #> 34: 34 rnorm deviation 100 -1 4 #> 35: 35 rnorm deviation 100 -1 5 #> 36: 36 rnorm deviation 100 0 1 #> 37: 37 rnorm deviation 100 0 2 #> 38: 38 rnorm deviation 100 0 3 #> 39: 39 rnorm deviation 100 0 4 #> 40: 40 rnorm deviation 100 0 5 #> 41: 41 rnorm deviation 100 1 1 #> 42: 42 rnorm deviation 100 1 2 #> 43: 43 rnorm deviation 100 1 3 #> 44: 44 rnorm deviation 100 1 4 #> 45: 45 rnorm deviation 100 1 5 #> 46: 46 rexp average 100 NA NA #> 47: 47 rexp average 100 NA NA #> 48: 48 rexp average 100 NA NA #> 49: 49 rexp average 100 NA NA #> 50: 50 rexp average 100 NA NA #> 51: 51 rexp average 100 NA NA #> 52: 52 rexp average 100 NA NA #> 53: 53 rexp average 100 NA NA #> 54: 54 rexp average 100 NA NA #> 55: 55 rexp average 100 NA NA #> 56: 56 rexp deviation 100 NA NA #> 57: 57 rexp deviation 100 NA NA #> 58: 58 rexp deviation 100 NA NA #> 59: 59 rexp deviation 100 NA NA #> 60: 60 rexp deviation 100 NA NA #> job.id problem algorithm prob.pars.n prob.pars.mean prob.pars.sd #> prob.pars.lambda algo.pars.method result.res #> 1: NA mean -1.092018851 #> 2: NA median -0.863780644 #> 3: NA mean -1.084890423 #> 4: NA median -1.505171392 #> 5: NA mean -1.381319138 #> 6: NA median -1.341051423 #> 7: NA mean -1.188083630 #> 8: NA median 0.029939562 #> 9: NA mean -0.987419910 #> 10: NA median -1.673002281 #> 11: NA mean 0.007267359 #> 12: NA median 0.127401909 #> 13: NA mean -0.179617350 #> 14: NA median 0.046125736 #> 15: NA mean -0.057929853 #> 16: NA median 0.140366680 #> 17: NA mean -0.388849478 #> 18: NA median -1.207701427 #> 19: NA mean 0.042334184 #> 20: NA median -0.381653696 #> 21: NA mean 0.907293957 #> 22: NA median 1.033215601 #> 23: NA mean 1.020619322 #> 24: NA median 0.876104940 #> 25: NA mean 0.679306393 #> 26: NA median 0.977987955 #> 27: NA mean 0.675594915 #> 28: NA median 1.366615131 #> 29: NA mean 1.189950292 #> 30: NA median 0.263092706 #> 31: NA <NA> 1.040412625 #> 32: NA <NA> 2.392300899 #> 33: NA <NA> 3.042692900 #> 34: NA <NA> 4.263774294 #> 35: NA <NA> 5.296092785 #> 36: NA <NA> 1.071510659 #> 37: NA <NA> 2.072637870 #> 38: NA <NA> 3.153765268 #> 39: NA <NA> 3.419489935 #> 40: NA <NA> 5.558785921 #> 41: NA <NA> 1.012589238 #> 42: NA <NA> 2.015986064 #> 43: NA <NA> 3.170339048 #> 44: NA <NA> 4.281061044 #> 45: NA <NA> 4.988645174 #> 46: 1 mean 0.945622941 #> 47: 1 median 0.619859008 #> 48: 2 mean 0.446385905 #> 49: 2 median 0.420682183 #> 50: 3 mean 0.397613162 #> 51: 3 median 0.306781776 #> 52: 4 mean 0.277069910 #> 53: 4 median 0.165324176 #> 54: 5 mean 0.182106656 #> 55: 5 median 0.144752066 #> 56: 1 <NA> 0.942306695 #> 57: 2 <NA> 0.453601669 #> 58: 3 <NA> 0.292487926 #> 59: 4 <NA> 0.210416655 #> 60: 5 <NA> 0.189556888 #> prob.pars.lambda algo.pars.method result.res

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/removeExperiments.html ================================================ Remove Experiments — removeExperiments • batchtools

    Remove Experiments from an ExperimentRegistry. This function automatically checks if any of the jobs to reset is either pending or running. However, if the implemented heuristic fails, this can lead to inconsistencies in the data base. Use with care while jobs are running.

    removeExperiments(ids = NULL, reg = getDefaultRegistry())

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to no job. Invalid ids are ignored.

    reg

    [ExperimentRegistry]
    Registry. If not explicitly passed, uses the last created registry.

    Value

    [data.table] of removed job ids, invisibly.

    See also

    Other Experiment: addExperiments(), summarizeExperiments()

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/removeRegistry.html ================================================ Remove a Registry from the File System — removeRegistry • batchtools

    All files will be erased from the file system, including all results. If you wish to remove only intermediate files, use sweepRegistry.

    removeRegistry(wait = 5, reg = getDefaultRegistry())

    Arguments

    wait

    [numeric(1)]
    Seconds to wait before proceeding. This is a safety measure to not accidentally remove your precious files. Set to 0 in non-interactive scripts to disable this precaution.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [character(1)]: Path of the deleted file directory.

    See also

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    removeRegistry(0, tmp)
    #> Recursively removing files in '/tmp/batchtools-example/reg' ...

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/resetJobs.html ================================================ Reset the Computational State of Jobs — resetJobs • batchtools

    Resets the computational state of jobs in the Registry. This function automatically checks if any of the jobs to reset is either pending or running. However, if the implemented heuristic fails, this can lead to inconsistencies in the data base. Use with care while jobs are running.

    resetJobs(ids = NULL, reg = getDefaultRegistry())

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to no job. Invalid ids are ignored.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] of job ids which have been reset. See JoinTables for examples on working with job tables.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/runHook.html ================================================ Trigger Evaluation of Custom Function — runHook • batchtools

    Hooks allow to trigger functions calls on specific events. They can be specified via the ClusterFunctions and are triggered on the following events:

    pre.sync

    function(reg, fns, ...): Run before synchronizing the registry on the master. fn is the character vector of paths to the update files.

    post.sync

    function(reg, updates, ...): Run after synchronizing the registry on the master. updates is the data.table of processed updates.

    pre.submit.job

    function(reg, ...): Run before a job is successfully submitted to the scheduler on the master.

    post.submit.job

    function(reg, ...): Run after a job is successfully submitted to the scheduler on the master.

    pre.submit

    function(reg, ...): Run before any job is submitted to the scheduler.

    post.submit

    function(reg, ...): Run after a jobs are submitted to the schedule.

    pre.do.collection

    function(reg, reader, ...): Run before starting the job collection on the slave. reader is an internal cache object.

    post.do.collection

    function(reg, updates, reader, ...): Run after all jobs in the chunk are terminated on the slave. updates is a data.table of updates which will be merged with the Registry by the master. reader is an internal cache object.

    pre.kill

    function(reg, ids, ...): Run before any job is killed.

    post.kill

    function(reg, ids, ...): Run after jobs are killed. ids is the return value of killJobs.

    runHook(obj, hook, ...)

    Arguments

    obj

    [Registry | JobCollection]
    Registry which contains the ClusterFunctions with element “hooks” or a JobCollection which holds the subset of functions which are executed remotely.

    hook

    [character(1)]
    ID of the hook as string.

    ...

    [ANY]
    Additional arguments passed to the function referenced by hook. See description.

    Value

    Return value of the called function, or NULL if there is no hook with the specified ID.

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/runOSCommand.html ================================================ Run OS Commands on Local or Remote Machines — runOSCommand • batchtools

    This is a helper function to run arbitrary OS commands on local or remote machines. The interface is similar to system2, but it always returns the exit status and the output.

    runOSCommand(
      sys.cmd,
      sys.args = character(0L),
      stdin = "",
      nodename = "localhost"
    )

    Arguments

    sys.cmd

    [character(1)]
    Command to run.

    sys.args

    [character]
    Arguments for sys.cmd.

    stdin

    [character(1)]
    Argument passed to system2.

    nodename

    [character(1)]
    Name of the SSH node to run the command on. If set to “localhost” (default), the command is not piped through SSH.

    Value

    [named list] with “sys.cmd”, “sys.args”, “exit.code” (integer), “output” (character).

    See also

    Examples

    if (FALSE) { runOSCommand("ls") runOSCommand("ls", "-al") runOSCommand("notfound") }

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/saveRegistry.html ================================================ Store the Registy to the File System — saveRegistry • batchtools

    Stores the registry on the file system in its “file.dir” (specified for construction in makeRegistry, can be accessed via reg$file.dir). This function is usually called internally whenever needed.

    saveRegistry(reg = getDefaultRegistry())

    Arguments

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [logical(1)]: TRUE if the registry was saved, FALSE otherwise (if the registry is read-only).

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/showLog.html ================================================ Inspect Log Files — showLog • batchtools

    showLog opens the log in the pager. For customization, see file.show. getLog returns the log as character vector.

    showLog(id, reg = getDefaultRegistry())
    
    getLog(id, reg = getDefaultRegistry())

    Arguments

    id

    [integer(1) or data.table]
    Single integer to specify the job or a data.table with column job.id and exactly one row.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    Nothing.

    See also

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    # Create some dummy jobs fun = function(i) { if (i == 3) stop(i) if (i %% 2 == 1) warning("That's odd.") } ids = batchMap(fun, i = 1:5, reg = tmp)
    #> Adding 5 jobs ...
    submitJobs(reg = tmp)
    #> Submitting 5 jobs in 5 chunks using cluster functions 'Interactive' ...
    #> Warning: That's odd.
    #> Error in (function (i) : 3
    #> Warning: That's odd.
    waitForJobs(reg = tmp)
    #> [1] FALSE
    getStatus(reg = tmp)
    #> Status for 5 jobs at 2020-10-21 09:39:31: #> Submitted : 5 (100.0%) #> -- Queued : 0 ( 0.0%) #> -- Started : 5 (100.0%) #> ---- Running : 0 ( 0.0%) #> ---- Done : 4 ( 80.0%) #> ---- Error : 1 ( 20.0%) #> ---- Expired : 0 ( 0.0%)
    writeLines(getLog(ids[1], reg = tmp))
    #> ### [bt]: This is batchtools v0.9.14 #> ### [bt]: Starting calculation of 1 jobs #> ### [bt]: Setting working directory to '/home/michel/Projekte/batchtools/docs/reference' #> ### [bt]: Memory measurement disabled #> ### [bt]: Starting job [batchtools job.id=1] #> ### [bt]: Setting seed to 5192 ... #> #> ### [bt]: Job terminated successfully [batchtools job.id=1] #> ### [bt]: Calculation finished!
    if (FALSE) { showLog(ids[1], reg = tmp) } grepLogs(pattern = "warning", ignore.case = TRUE, reg = tmp)
    #> Empty data.table (0 rows and 2 cols): job.id,matches

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/submitJobs.html ================================================ Submit Jobs to the Batch Systems — submitJobs • batchtools

    Submits defined jobs to the batch system.

    After submitting the jobs, you can use waitForJobs to wait for the termination of jobs or call reduceResultsList/reduceResults to collect partial results. The progress can be monitored with getStatus.

    submitJobs(
      ids = NULL,
      resources = list(),
      sleep = NULL,
      reg = getDefaultRegistry()
    )

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of findNotSubmitted. Invalid ids are ignored.

    resources

    [named list]
    Computational resources for the jobs to submit. The actual elements of this list (e.g. something like “walltime” or “nodes”) depend on your template file, exceptions are outlined in the section 'Resources'. Default settings for a system can be set in the configuration file by defining the named list default.resources. Note that these settings are merged by name, e.g. merging list(walltime = 300) into list(walltime = 400, memory = 512) will result in list(walltime = 300, memory = 512). Same holds for individual job resources passed as additional column of ids (c.f. section 'Resources').

    sleep

    [function(i) | numeric(1)]
    Parameter to control the duration to sleep between temporary errors. You can pass an absolute numeric value in seconds or a function(i) which returns the number of seconds to sleep in the i-th iteration between temporary errors. If not provided (NULL), tries to read the value (number/function) from the configuration file (stored in reg$sleep) or defaults to a function with exponential backoff between 5 and 120 seconds.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [data.table] with columns “job.id” and “chunk”.

    Note

    If you a large number of jobs, disabling the progress bar (options(batchtools.progress = FALSE)) can significantly increase the performance of submitJobs.

    Resources

    You can pass arbitrary resources to submitJobs() which then are available in the cluster function template. Some resources' names are standardized and it is good practice to stick to the following nomenclature to avoid confusion:

    walltime:

    Upper time limit in seconds for jobs before they get killed by the scheduler. Can be passed as additional column as part of ids to set per-job resources.

    memory:

    Memory limit in Mb. If jobs exceed this limit, they are usually killed by the scheduler. Can be passed as additional column as part of ids to set per-job resources.

    ncpus:

    Number of (physical) CPUs to use on the slave. Can be passed as additional column as part of ids to set per-job resources.

    omp.threads:

    Number of threads to use via OpenMP. Used to set environment variable “OMP_NUM_THREADS”. Can be passed as additional column as part of ids to set per-job resources.

    pp.size:

    Maximum size of the pointer protection stack, see Memory.

    blas.threads:

    Number of threads to use for the BLAS backend. Used to set environment variables “MKL_NUM_THREADS” and “OPENBLAS_NUM_THREADS”. Can be passed as additional column as part of ids to set per-job resources.

    measure.memory:

    Enable memory measurement for jobs. Comes with a small runtime overhead.

    chunks.as.arrayjobs:

    Execute chunks as array jobs.

    pm.backend:

    Start a parallelMap backend on the slave.

    foreach.backend:

    Start a foreach backend on the slave.

    clusters:

    Resource used for Slurm to select the set of clusters to run sbatch/squeue/scancel on.

    Chunking of Jobs

    Multiple jobs can be grouped (chunked) together to be executed sequentially on the batch system as a single batch job. This is especially useful to avoid overburding the scheduler by submitting thousands of jobs simultaneously. To chunk jobs together, job ids must be provided as data.frame with columns “job.id” and “chunk” (integer). All jobs with the same chunk number will be executed sequentially inside the same batch job. The utility functions chunk, binpack and lpt can assist in grouping jobs.

    Array Jobs

    If your cluster supports array jobs, you can set the resource chunks.as.arrayjobs to TRUE in order to execute chunks as job arrays on the cluster. For each chunk of size n, batchtools creates a JobCollection of (possibly heterogeneous) jobs which is submitted to the scheduler as a single array job with n repetitions. For each repetition, the JobCollection is first read from the file system, then subsetted to the i-th job using the environment variable reg$cluster.functions$array.var (depending on the cluster backend, defined automatically) and finally executed.

    Order of Submission

    Jobs are submitted in the order of chunks, i.e. jobs which have chunk number sort(unique(ids$chunk))[1] first, then jobs with chunk number sort(unique(ids$chunk))[2] and so on. If no chunks are provided, jobs are submitted in the order of ids$job.id.

    Limiting the Number of Jobs

    If requested, submitJobs tries to limit the number of concurrent jobs of the user by waiting until jobs terminate before submitting new ones. This can be controlled by setting “max.concurrent.jobs” in the configuration file (see Registry) or by setting the resource “max.concurrent.jobs” to the maximum number of jobs to run simultaneously. If both are set, the setting via the resource takes precedence over the setting in the configuration.

    Measuring Memory

    Setting the resource measure.memory to TRUE turns on memory measurement: gc is called directly before and after the job and the difference is stored in the internal database. Note that this is just a rough estimate and does neither work reliably for external code like C/C++ nor in combination with threading.

    Inner Parallelization

    Inner parallelization is typically done via threading, sockets or MPI. Two backends are supported to assist in setting up inner parallelization.

    The first package is parallelMap. If you set the resource “pm.backend” to “multicore”, “socket” or “mpi”, parallelStart is called on the slave before the first job in the chunk is started and parallelStop is called after the last job terminated. This way, the resources for inner parallelization can be set and get automatically stored just like other computational resources. The function provided by the user just has to call parallelMap to start parallelization using the preconfigured backend.

    To control the number of CPUs, you have to set the resource ncpus. Otherwise ncpus defaults to the number of available CPUs (as reported by (see detectCores)) on the executing machine for multicore and socket mode and defaults to the return value of mpi.universe.size-1 for MPI. Your template must be set up to handle the parallelization, e.g. request the right number of CPUs or start R with mpirun. You may pass further options like level to parallelStart via the named list “pm.opts”.

    The second supported parallelization backend is foreach. If you set the resource “foreach.backend” to “seq” (sequential mode), “parallel” (doParallel) or “mpi” (doMPI), the requested foreach backend is automatically registered on the slave. Again, the resource ncpus is used to determine the number of CPUs.

    Neither the namespace of parallelMap nor the namespace foreach are attached. You have to do this manually via library or let the registry load the packages for you.

    Examples

    batchtools:::example_push_temp(3) ### Example 1: Submit subsets of jobs tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg1' using cluster functions 'Interactive'
    # toy function which fails if x is even and an input file does not exists fun = function(x, fn) if (x %% 2 == 0 && !file.exists(fn)) stop("file not found") else x # define jobs via batchMap fn = tempfile() ids = batchMap(fun, 1:20, reg = tmp, fn = fn)
    #> Adding 20 jobs ...
    # submit some jobs ids = 1:10 submitJobs(ids, reg = tmp)
    #> Submitting 10 jobs in 10 chunks using cluster functions 'Interactive' ...
    #> Error in (function (x, fn) : file not found #> Error in (function (x, fn) : file not found #> Error in (function (x, fn) : file not found #> Error in (function (x, fn) : file not found #> Error in (function (x, fn) : file not found
    waitForJobs(ids, reg = tmp)
    #> [1] FALSE
    getStatus(reg = tmp)
    #> Status for 20 jobs at 2020-10-21 09:39:31: #> Submitted : 10 ( 50.0%) #> -- Queued : 0 ( 0.0%) #> -- Started : 10 ( 50.0%) #> ---- Running : 0 ( 0.0%) #> ---- Done : 5 ( 25.0%) #> ---- Error : 5 ( 25.0%) #> ---- Expired : 0 ( 0.0%)
    # create the required file and re-submit failed jobs file.create(fn)
    #> [1] TRUE
    submitJobs(findErrors(ids, reg = tmp), reg = tmp)
    #> Submitting 5 jobs in 5 chunks using cluster functions 'Interactive' ...
    getStatus(reg = tmp)
    #> Status for 20 jobs at 2020-10-21 09:39:31: #> Submitted : 10 ( 50.0%) #> -- Queued : 0 ( 0.0%) #> -- Started : 10 ( 50.0%) #> ---- Running : 0 ( 0.0%) #> ---- Done : 10 ( 50.0%) #> ---- Error : 0 ( 0.0%) #> ---- Expired : 0 ( 0.0%)
    # submit remaining jobs which have not yet been submitted ids = findNotSubmitted(reg = tmp) submitJobs(ids, reg = tmp)
    #> Submitting 10 jobs in 10 chunks using cluster functions 'Interactive' ...
    getStatus(reg = tmp)
    #> Status for 20 jobs at 2020-10-21 09:39:31: #> Submitted : 20 (100.0%) #> -- Queued : 0 ( 0.0%) #> -- Started : 20 (100.0%) #> ---- Running : 0 ( 0.0%) #> ---- Done : 20 (100.0%) #> ---- Error : 0 ( 0.0%) #> ---- Expired : 0 ( 0.0%)
    # collect results reduceResultsList(reg = tmp)
    #> [[1]] #> [1] 1 #> #> [[2]] #> [1] 2 #> #> [[3]] #> [1] 3 #> #> [[4]] #> [1] 4 #> #> [[5]] #> [1] 5 #> #> [[6]] #> [1] 6 #> #> [[7]] #> [1] 7 #> #> [[8]] #> [1] 8 #> #> [[9]] #> [1] 9 #> #> [[10]] #> [1] 10 #> #> [[11]] #> [1] 11 #> #> [[12]] #> [1] 12 #> #> [[13]] #> [1] 13 #> #> [[14]] #> [1] 14 #> #> [[15]] #> [1] 15 #> #> [[16]] #> [1] 16 #> #> [[17]] #> [1] 17 #> #> [[18]] #> [1] 18 #> #> [[19]] #> [1] 19 #> #> [[20]] #> [1] 20 #>
    ### Example 2: Using memory measurement tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg2' using cluster functions 'Interactive'
    # Toy function which creates a large matrix and returns the column sums fun = function(n, p) colMeans(matrix(runif(n*p), n, p)) # Arguments to fun: args = data.table::CJ(n = c(1e4, 1e5), p = c(10, 50)) # like expand.grid() print(args)
    #> n p #> 1: 1e+04 10 #> 2: 1e+04 50 #> 3: 1e+05 10 #> 4: 1e+05 50
    # Map function to create jobs ids = batchMap(fun, args = args, reg = tmp)
    #> Adding 4 jobs ...
    # Set resources: enable memory measurement res = list(measure.memory = TRUE) # Submit jobs using the currently configured cluster functions submitJobs(ids, resources = res, reg = tmp)
    #> Submitting 4 jobs in 4 chunks using cluster functions 'Interactive' ...
    # Retrive information about memory, combine with parameters info = ijoin(getJobStatus(reg = tmp)[, .(job.id, mem.used)], getJobPars(reg = tmp)) print(unwrap(info))
    #> job.id mem.used n p #> 1: 1 145.3133 1e+04 10 #> 2: 2 145.3144 1e+04 50 #> 3: 3 145.3146 1e+05 10 #> 4: 4 145.3153 1e+05 50
    # Combine job info with results -> each job is aggregated using mean() unwrap(ijoin(info, reduceResultsDataTable(fun = function(res) list(res = mean(res)), reg = tmp)))
    #> job.id mem.used n p res #> 1: 1 145.3133 1e+04 10 0.5005778 #> 2: 2 145.3144 1e+04 50 0.4992527 #> 3: 3 145.3146 1e+05 10 0.5000026 #> 4: 4 145.3153 1e+05 50 0.4999301
    ### Example 3: Multicore execution on the slave tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg3' using cluster functions 'Interactive'
    # Function which sleeps 10 seconds, i-times f = function(i) { parallelMap::parallelMap(Sys.sleep, rep(10, i)) } # Create one job with parameter i=4 ids = batchMap(f, i = 4, reg = tmp)
    #> Adding 1 jobs ...
    # Set resources: Use parallelMap in multicore mode with 4 CPUs # batchtools internally loads the namespace of parallelMap and then # calls parallelStart() before the job and parallelStop() right # after the job last job in the chunk terminated. res = list(pm.backend = "multicore", ncpus = 4) if (FALSE) { # Submit both jobs and wait for them submitJobs(resources = res, reg = tmp) waitForJobs(reg = tmp) # If successfull, the running time should be ~10s getJobTable(reg = tmp)[, .(job.id, time.running)] # There should also be a note in the log: grepLogs(pattern = "parallelMap", reg = tmp) }

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/summarizeExperiments.html ================================================ Quick Summary over Experiments — summarizeExperiments • batchtools

    Returns a frequency table of defined experiments. See ExperimentRegistry for an example.

    summarizeExperiments(
      ids = NULL,
      by = c("problem", "algorithm"),
      reg = getDefaultRegistry()
    )

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.

    by

    [character]
    Split the resulting table by columns of getJobPars.

    reg

    [ExperimentRegistry]
    Registry. If not explicitly passed, uses the last created registry.

    Value

    [data.table] of frequencies.

    See also

    Other Experiment: addExperiments(), removeExperiments()

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/sweepRegistry.html ================================================ Check Consistency and Remove Obsolete Information — sweepRegistry • batchtools

    Canceled jobs and jobs submitted multiple times may leave stray files behind. This function checks the registry for consistency and removes obsolete files and redundant data base entries.

    sweepRegistry(reg = getDefaultRegistry())

    Arguments

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/syncRegistry.html ================================================ Synchronize the Registry — syncRegistry • batchtools

    Parses update files written by the slaves to the file system and updates the internal data base.

    syncRegistry(reg = getDefaultRegistry())

    Arguments

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [logical(1)]: TRUE if the state has changed, FALSE otherwise.

    See also

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/testJob.html ================================================ Run Jobs Interactively — testJob • batchtools

    Starts a single job on the local machine.

    testJob(id, external = FALSE, reg = getDefaultRegistry())

    Arguments

    id

    [integer(1) or data.table]
    Single integer to specify the job or a data.table with column job.id and exactly one row.

    external

    [logical(1)]
    Run the job in an external R session? If TRUE, starts a fresh R session on the local machine to execute the with execJob. You will not be able to use debug tools like traceback or browser.

    If external is set to FALSE (default) on the other hand, testJob will execute the job in the current R session and the usual debugging tools work. However, spotting missing variable declarations (as they are possibly resolved in the global environment) is impossible. Same holds for missing package dependency declarations.

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    Returns the result of the job if successful.

    See also

    Examples

    batchtools:::example_push_temp(1) tmp = makeRegistry(file.dir = NA, make.default = FALSE)
    #> No readable configuration file found
    #> Created registry in '/tmp/batchtools-example/reg' using cluster functions 'Interactive'
    batchMap(function(x) if (x == 2) xxx else x, 1:2, reg = tmp)
    #> Adding 2 jobs ...
    testJob(1, reg = tmp)
    #> ### [bt]: Setting seed to 11688 ...
    #> [1] 1
    if (FALSE) { testJob(2, reg = tmp) }

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/unwrap.html ================================================ Unwrap Nested Data Frames — unwrap • batchtools

    Some functions (e.g., getJobPars, getJobResources or reduceResultsDataTable return a data.table with columns of type list. These columns can be unnested/unwrapped with this function. The contents of these columns will be transformed to a data.table and cbind-ed to the input data.frame x, replacing the original nested column.

    unwrap(x, cols = NULL, sep = NULL)
    
    flatten(x, cols = NULL, sep = NULL)

    Arguments

    x

    [data.frame | data.table]
    Data frame to flatten.

    cols

    [character]
    Columns to consider for this operation. If set to NULL (default), will operate on all columns of type “list”.

    sep

    [character(1)]
    If NULL (default), the column names of the additional columns will re-use the names of the nested list/data.frame. This may lead to name clashes. If you provide sep, the variable column name will be constructed as “[column name of x][sep][inner name]”.

    Value

    [data.table].

    Note

    There is a name clash with function flatten in package purrr. The function flatten is discouraged to use for this reason in favor of unwrap.

    Examples

    x = data.table::data.table( id = 1:3, values = list(list(a = 1, b = 3), list(a = 2, b = 2), list(a = 3)) ) unwrap(x)
    #> id a b #> 1: 1 1 3 #> 2: 2 2 2 #> 3: 3 3 NA
    unwrap(x, sep = ".")
    #> id values.a values.b #> 1: 1 1 3 #> 2: 2 2 2 #> 3: 3 3 NA

    Site built with pkgdown 1.6.1.

    ================================================ FILE: docs/reference/waitForJobs.html ================================================ Wait for Termination of Jobs — waitForJobs • batchtools

    This function simply waits until all jobs are terminated.

    waitForJobs(
      ids = NULL,
      sleep = NULL,
      timeout = 604800,
      expire.after = NULL,
      stop.on.error = FALSE,
      stop.on.expire = FALSE,
      reg = getDefaultRegistry()
    )

    Arguments

    ids

    [data.frame or integer]
    A data.frame (or data.table) with a column named “job.id”. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of findSubmitted. Invalid ids are ignored.

    sleep

    [function(i) | numeric(1)]
    Parameter to control the duration to sleep between queries. You can pass an absolute numeric value in seconds or a function(i) which returns the number of seconds to sleep in the i-th iteration. If not provided (NULL), tries to read the value (number/function) from the configuration file (stored in reg$sleep) or defaults to a function with exponential backoff between 5 and 120 seconds.

    timeout

    [numeric(1)]
    After waiting timeout seconds, show a message and return FALSE. This argument may be required on some systems where, e.g., expired jobs or jobs on hold are problematic to detect. If you don't want a timeout, set this to Inf. Default is 604800 (one week).

    expire.after

    [integer(1)]
    Jobs count as “expired” if they are not found on the system but have not communicated back their results (or error message). This frequently happens on managed system if the scheduler kills a job because the job has hit the walltime or request more memory than reserved. On the other hand, network file systems often require several seconds for new files to be found, which can lead to false positives in the detection heuristic. waitForJobs treats such jobs as expired after they have not been detected on the system for expire.after iterations. If not provided (NULL), tries to read the value from the configuration file (stored in reg$expire.after), and finally defaults to 3.

    stop.on.error

    [logical(1)]
    Immediately cancel if a job terminates with an error? Default is FALSE.

    stop.on.expire

    [logical(1)]
    Immediately cancel if jobs are detected to be expired? Default is FALSE. Expired jobs will then be ignored for the remainder of waitForJobs().

    reg

    [Registry]
    Registry. If not explicitly passed, uses the default registry (see setDefaultRegistry).

    Value

    [logical(1)]. Returns TRUE if all jobs terminated successfully and FALSE if either the timeout is reached or at least one job terminated with an exception or expired.

    Site built with pkgdown 1.6.1.

    ================================================ FILE: inst/CITATION ================================================ ## -*- mode: r -*- citHeader("To cite BatchJobs, BatchExperiments or batchtools in publications use:") bibentry("Article", title = "batchtools: Tools for R to work on batch systems", author = c(as.person("Michel Lang"), as.person("Bernd Bischl"), as.person("Dirk Surmann")), journal = "The Journal of Open Source Software", year = "2017", month = "feb", number = "10", doi = "10.21105/joss.00135", url = "https://doi.org/10.21105/joss.00135", textVersion = paste("Michel Lang, Bernd Bischl, Dirk Surmann (2017).", "batchtools: Tools for R to work on batch systems.", "The Journal of Open Source Software, 2(10).", "URL https://doi.org/10.21105/joss.00135.") ) bibentry("Article", title = "{BatchJobs} and {BatchExperiments}: Abstraction Mechanisms for Using {R} in Batch Environments", author = c(as.person("Bernd Bischl"), as.person("Michel Lang"), as.person("Olaf Mersmann"), as.person("J{\\\"o}rg Rahnenf{\\\"u}hrer"), as.person("Claus Weihs")), journal = "Journal of Statistical Software", year = "2015", volume = "64", number = "11", pages = "1--25", doi = "10.18637/jss.v064.i11", url = "https://www.jstatsoft.org/v64/i11/", textVersion = paste("Bernd Bischl, Michel Lang, Olaf Mersmann, Joerg Rahnenfuehrer, Claus Weihs (2015).", "BatchJobs and BatchExperiments: Abstraction Mechanisms for Using R in Batch Environments.", "Journal of Statistical Software, 64(11), 1-25.", "URL https://www.jstatsoft.org/v64/i11/.") ) ================================================ FILE: inst/bin/linux-helper ================================================ #!/bin/bash ## linux-helper: Helper for the multicore and SSH cluster functions of the BatchJobs R ## package. ## ## Requires the following Unix command line utilities: ## ## * grep, wc, ps, kill, uptime, echo, cat, possibly setsid ## ## The following commands are implemented. First argument is always the command name. ## For other arguments see below. Each command returns a character vector. ## ## number-of-cpus ## Return the number of PEs on worker. ## ## start-job NICE JOBFILE OUTFILE ## Start an Rscript process running doJobCollection on $JOBFILE and log ## the output in $OUTFILE. ## Returns: PID of sh process which spawned R. We use that as batch.id. ## ## kill-job PID ## Kill the R job with PID $PID. The PID is the PID of ## the sh process returned by start-job. ## First a TERM is sent, then 1 sec delay, then KILL. ## ## status FILEDIR ## Return 4 numbers: ## - load average of last 1 min, as given by e.g. uptime ## - number of R processes by _all_ users ## - number of R processes by _all_ users which have a load of >= 50% ## - number of R processes by current user ## which match $FILEDIR/jobs in the cmd call of R ## ## list-jobs FILEDIR ## Return the PIDs of running R jobs operating on $FILEDIR/jobs. echo "[bt] --BOF--" CMD="$1"; shift export LC_ALL=C ### Avoid any localization issues. shopt -s nocasematch ### Case insensitive regular expressions case $CMD in number-of-cpus) if [[ `uname` =~ "Linux" ]]; then NCPU=`cat /proc/cpuinfo | grep '^processor' | wc -l` else ## darwin NCPU=`sysctl -n hw.ncpu` fi echo "[bt]" $NCPU ;; start-job) Rscript -e "batchtools::doJobCollection('$1')" > "$2" 2>&1 & echo "[bt]" $! ;; kill-job) kill -TERM $1 > /dev/null 2> /dev/null sleep 1 kill -KILL $1 > /dev/null 2> /dev/null exit 0 ;; status) # remove everyting till load average(s), then delete commas LOAD=$(uptime | awk '{gsub(/.*:/,""); {gsub(/,/,"")}; print $1}') JOBDIR="$1/jobs" # print 3 columns for all processes # use ww for unlimited width in ps for command output # we count all R procs, all R50, and all where JOBDIR was in the call args ps -e -ww -o pcpu= -o ucomm= -o command= | \ awk -v j=$JOBDIR -v sysload=$LOAD ' BEGIN {rprocs=0;rprocs_50=0;njobs=0} $2 != "R" {next} {rprocs++} $1 > 50.0 {rprocs_50++} $0 ~ j {njobs++} END {print "[bt] " sysload " " rprocs " " rprocs_50 " " njobs}' ;; list-jobs) JOBDIR="$1/jobs" ps -e -ww -o pid= -o ucomm= -o command= | awk -v j=$JOBDIR '$2 == "R" && $0 ~ j { print "[bt] " $1 }' ;; *) esac echo "[bt] --EOF--" ================================================ FILE: inst/templates/lsf-simple.tmpl ================================================ ## Default resources can be set in your .batchtools.conf.R by defining the variable ## 'default.resources' as a named list. #BSUB -J <%= job.name %> # Name of the job #BSUB -o <%= log.file %> # Output is sent to logfile, stdout + stderr by default #BSUB -q <%= resources$queue %> # Job queue #BSUB -W <%= round(resources$walltime / 60, 1) %> # Walltime (LSF requires minutes, batchtools uses seconds) #BSUB -M <%= resources$memory %> # Memory requirements, e.g. "5000KB", "500MB", "5GB" etc. ## Export value of DEBUGME environemnt var to slave export DEBUGME=<%= Sys.getenv("DEBUGME") %> <%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%> <%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%> <%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%> Rscript -e 'batchtools::doJobCollection("<%= uri %>")' ================================================ FILE: inst/templates/openlava-simple.tmpl ================================================ ## Default resources can be set in your .batchtools.conf.R by defining the variable ## 'default.resources' as a named list. ## Remove [*] if arrayjobs are not supported #BSUB-J <%= job.name %>[1-<%= nrow(jobs) %>] # name of the job / number of jobs in chunk #BSUB-o <%= log.file %> # output is sent to logfile, stdout + stderr by default #BSUB-q <%= resources$queue %> # Job queue #BSUB-W <%= resources$walltime %> # Walltime in minutes #BSUB-M <%= resources$memory %> # Memory requirements in Kbytes ## Export value of DEBUGME environemnt var to slave export DEBUGME=<%= Sys.getenv("DEBUGME") %> <%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%> <%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%> <%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%> Rscript -e 'batchtools::doJobCollection("<%= uri %>")' ================================================ FILE: inst/templates/sge-simple.tmpl ================================================ #!/bin/bash ## The name of the job, can be anything, simply used when displaying the list of running jobs #$ -N <%= job.name %> ## Combining output/error messages into one file #$ -j y ## Giving the name of the output log file #$ -o <%= log.file %> ## One needs to tell the queue system to use the current directory as the working directory ## Or else the script may fail as it will execute in your top level home directory /home/username #$ -cwd ## Use environment variables #$ -V ## Use correct queue #$ -q <%= resources$queue %> ## Export value of DEBUGME environemnt var to slave export DEBUGME=<%= Sys.getenv("DEBUGME") %> <%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%> <%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%> <%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%> Rscript -e 'batchtools::doJobCollection("<%= uri %>")' exit 0 ================================================ FILE: inst/templates/slurm-dortmund.tmpl ================================================ #!/bin/bash <% backend = resources$pm.backend %??% "local" ncpus = resources$ncpus %??% 1L walltime = asInt(resources$walltime, lower = 1L, upper = 172800L) memory = asInt(resources$memory, lower = 100L, upper = 64000L) if (backend == "mpi") { cmd = "mpirun -np 1 Rscript" mincpus = 2L } else { cmd = "Rscript" mincpus = 1L } # relative paths are not handled well by Slurm log.file = fs::path_expand(log.file) -%> #SBATCH --job-name=<%= job.name %> #SBATCH --output=<%= log.file %> #SBATCH --error=<%= log.file %> #SBATCH --time=<%= ceiling(walltime / 60L)%> #SBATCH --ntasks=<%= if (backend == "mpi") ncpus else 1L %> #SBATCH --mincpus=<%= mincpus %> #SBATCH --cpus-per-task=<%= if (backend == "mpi") 1L else ncpus %> #SBATCH --mem-per-cpu=<%= memory %> #SBATCH --partition=all mkdir /tmp/${USER}-${SLURM_JOBID} export TMPDIR=/tmp/${USER}-${SLURM_JOBID} ## Export value of DEBUGME environemnt var to slave export DEBUGME=<%= Sys.getenv("DEBUGME") %> <%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%> <%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%> <%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%> source /etc/profile <%= cmd %> -e 'batchtools::doJobCollection("<%= uri %>")' rm -rf /tmp/${USER}-${SLURM_JOBID} ================================================ FILE: inst/templates/slurm-lido3.tmpl ================================================ #!/bin/bash ## Job Resource Interface Definition ## ## ncpus [integer(1)]: Number of required cpus per task, ## Set larger than 1 if you want to further parallelize ## with multicore/parallel within each task. ## walltime [integer(1)]: Walltime for this job, in seconds. ## Must be at least 1 minute. ## memory [integer(1)]: Memory in megabytes for each cpu. ## Must be at least 100 (when I tried lower values my ## jobs did not start at all). ## ## Default resources can be set in your .batchtools.conf.R by defining the variable ## 'default.resources' as a named list. <% # queue walltime = asInt(resources$walltime, lower = 60L, upper = 31L * 24L * 60L * 60L) memory = asInt(resources$memory, lower = 100L, upper = 1024L * 1024L) walltimes = c(2L, 8L, 48L, 672L) * 3600L queue = c("short", "med", "long", "ultralong")[wf(walltime <= walltimes)] ncpus = if (!is.null(resources$ncpus)) ncpus = assertInt(resources$ncpus, lower = 1L) else 1L # modules modules = paste(resources$modules, resources$R) # cli args cli.args = "" if (!is.null(resources$pp.size)) cli.args = sprintf("--max-ppsize=%i", assertInt(pp.size, upper = 500000L)) -%> #SBATCH --job-name=<%= job.name %> #SBATCH --output=<%= log.file %> #SBATCH --error=<%= log.file %> #SBATCH --time=<%= ceiling(walltime / 60L) %> #SBATCH --partition=<%= queue %> #SBATCH --cpus-per-task=<%= ncpus %> #SBATCH --mem-per-cpu=<%= memory %> <%= if (array.jobs) sprintf("#SBATCH --array=1-%i", nrow(jobs)) else "" %> ## Initialize work environment like module add <%= modules %> ## Export value of DEBUGME environemnt var to slave export DEBUGME=<%= Sys.getenv("DEBUGME") %> ## Use /scratch on the node, TMPDIR is mounted as tmpfs export TMPDIR=/scratch/${USER}/${SLURM_JOBID} mkdir -p ${TMPDIR} ## Run R: ## we merge R output with stdout from SLURM, which gets then logged via --output option Rscript <%= cli.args -%> -e 'batchtools::doJobCollection("<%= uri %>")' ================================================ FILE: inst/templates/slurm-simple.tmpl ================================================ #!/bin/bash ## Job Resource Interface Definition ## ## ntasks [integer(1)]: Number of required tasks, ## Set larger than 1 if you want to further parallelize ## with MPI within your job. ## ncpus [integer(1)]: Number of required cpus per task, ## Set larger than 1 if you want to further parallelize ## with multicore/parallel within each task. ## walltime [integer(1)]: Walltime for this job, in seconds. ## Must be at least 60 seconds for Slurm to work properly. ## memory [integer(1)]: Memory in megabytes for each cpu. ## Must be at least 100 (when I tried lower values my ## jobs did not start at all). ## ## Default resources can be set in your .batchtools.conf.R by defining the variable ## 'default.resources' as a named list. <% # relative paths are not handled well by Slurm log.file = fs::path_expand(log.file) -%> #SBATCH --job-name=<%= job.name %> #SBATCH --output=<%= log.file %> #SBATCH --error=<%= log.file %> #SBATCH --time=<%= ceiling(resources$walltime / 60) %> #SBATCH --ntasks=1 #SBATCH --cpus-per-task=<%= resources$ncpus %> #SBATCH --mem-per-cpu=<%= resources$memory %> <%= if (!is.null(resources$partition)) sprintf(paste0("#SBATCH --partition='", resources$partition, "'")) %> <%= if (array.jobs) sprintf("#SBATCH --array=1-%i", nrow(jobs)) else "" %> ## Initialize work environment like ## source /etc/profile ## module add ... ## Export value of DEBUGME environemnt var to slave export DEBUGME=<%= Sys.getenv("DEBUGME") %> <%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%> <%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%> <%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%> ## Run R: ## we merge R output with stdout from SLURM, which gets then logged via --output option Rscript -e 'batchtools::doJobCollection("<%= uri %>")' ================================================ FILE: inst/templates/testJob.tmpl ================================================ options(warn = 1L) Sys.setenv(DEBUGME = "<%= Sys.getenv('DEBUGME') %>") requireNamespace("batchtools", quietly = TRUE) jc = force(readRDS("<%= jc %>")) setwd(jc$work.dir) batchtools:::loadRegistryDependencies(jc, must.work = TRUE) res = batchtools::execJob(jc) saveRDS(res, file = "<%= result %>", version = 2L) quit(save = "no", status = 0L) # vim: ft=r ================================================ FILE: inst/templates/torque-lido.tmpl ================================================ #!/bin/bash <% ## Check some resources and set sane defaults resources$walltime = asInt(resources$walltime, lower = 60L, upper = 3600L * 672L) resources$memory = asInt(resources$memory, lower = 100L, upper = 64L * 1024L) resources$ncpus = if (is.null(resources$ncpus)) 1L else asInt(resources$ncpus, lower = 1L) resources$modules = if (is.null(resources$modules)) character(0L) else assertCharacter(resources$modules, any.missing = FALSE) resources$R = if (is.null(resources$R)) "R/3.4.1-gcc49-base" else assertString(resources$R) resources$omp.threads = if (is.null(resources$omp.threads)) 1L else asInt(resources$omp.threads, lower = 1L) resources$blas.threads = if (is.null(resources$blas.threads)) 1L else asInt(resources$blas.threads, lower = 1L) if (!is.null(resources$type)) assertString(resources$type) if (resources$memory > 15000) resources$type = "quad" use.mpi = (resources$pm.backend %??% "default") == "mpi" ## first string of queue, selected by walltime walltimes = 3600L * c(1L, 8L, 48L, 672L) queue = c("short", "med", "long", "ultralong")[wf(resources$walltime <= walltimes)] ## check default modules modules.default = c(binutils = "binutils/2.25", gcc = "gcc/4.9.3", openblas = "openblas/0.2.17") modules = resources$modules for (i in seq_along(modules.default)) { if (!any(grepl(paste0("^", names(modules.default[i]), "/?[0-9\\.]*$"), modules))) { modules = paste(modules, modules.default[i]) } } ## add R modules = paste(modules, resources$R) ## add mpi if (use.mpi) modules = paste(modules, "openmpi/gcc4.9.x") ## very ugly hack because we cannot log to data (nobackup) filesystem on lido, ## only home fs is available ## unfortunately there seems to be no generic solution ## does log path start with /data/? log.file = log.file if (length(grep("^/data/", log.file)) > 0L) { ## strip that log.file = substr(log.file, 7L, nchar(log.file)) ## find next forward slash i = regexpr("/", log.file) if (i != -1) { ## this must be "user": e.g. /data/bischl/... user = substr(log.file, 1L, i-1L) ## put together log.file = sprintf("/home/%s/nobackup%s", user, substr(log.file, i, nchar(log.file))) } } -%> #PBS -N <%= job.name %> #PBS -o <%= log.file %> #PBS -l walltime=<%= resources$walltime %>,nodes=<%= if (use.mpi) resources$ncpus else 1 %>:ppn=<%= if (use.mpi) 1 else resources$ncpus %><%= if (!is.null(resources$type)) paste0(":", resources$type) %>,vmem=<%= resources$memory %>M #PBS -q <%= queue %> #PBS -j oe <%= if (array.jobs) sprintf("#PBS -t 1-%i", nrow(jobs)) else "" %> ## setup modules source /sysdata/shared/sfw/Modules/default/init/bash module add <%= modules %> ## create our own temp dir (and clean it up later), lido does not do this automatically mkdir /scratch/${USER}-${PBS_JOBID} export TMPDIR=/scratch/${USER}-${PBS_JOBID} <%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%> <%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%> <%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%> ## export value of DEBUGME environemnt var to slave export DEBUGME=<%= Sys.getenv("DEBUGME") %> ## run R <%= if (use.mpi) "mpirun -np 1 " else "" %>Rscript -e 'batchtools::doJobCollection("<%= uri %>")' ## Cleanup rm -rf /scratch/${USER}-${PBS_JOBID} ================================================ FILE: man/JobCollection.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/JobCollection.R \name{makeJobCollection} \alias{makeJobCollection} \alias{JobCollection} \title{JobCollection Constructor} \usage{ makeJobCollection(ids = NULL, resources = list(), reg = getDefaultRegistry()) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.} \item{resources}{[\code{list}]\cr Named list of resources. Default is \code{list()}.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{JobCollection}]. } \description{ \code{makeJobCollection} takes multiple job ids and creates an object of class \dQuote{JobCollection} which holds all necessary information for the calculation with \code{\link{doJobCollection}}. It is implemented as an environment with the following variables: \describe{ \item{file.dir}{\code{file.dir} of the \link{Registry}.} \item{work.dir:}{\code{work.dir} of the \link{Registry}.} \item{job.hash}{Unique identifier of the job. Used to create names on the file system.} \item{jobs}{\code{\link[data.table]{data.table}} holding individual job information. See examples.} \item{log.file}{Location of the designated log file for this job.} \item{resources:}{Named list of of specified computational resources.} \item{uri}{Location of the job description file (saved with \code{link[base]{saveRDS}} on the file system.} \item{seed}{\code{integer(1)} Seed of the \link{Registry}.} \item{packages}{\code{character} with required packages to load via \code{\link[base]{require}}.} \item{namespaces}{\code{character} with required packages to load via \code{\link[base]{requireNamespace}}.} \item{source}{\code{character} with list of files to source before execution.} \item{load}{\code{character} with list of files to load before execution.} \item{array.var}{\code{character(1)} of the array environment variable specified by the cluster functions.} \item{array.jobs}{\code{logical(1)} signaling if jobs were submitted using \code{chunks.as.arrayjobs}.} } If your \link{ClusterFunctions} uses a template, \code{\link[brew]{brew}} will be executed in the environment of such a collection. Thus all variables available inside the job can be used in the template. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE, packages = "methods") batchMap(identity, 1:5, reg = tmp) # resources are usually set in submitJobs() jc = makeJobCollection(1:3, resources = list(foo = "bar"), reg = tmp) ls(jc) jc$resources } \seealso{ Other JobCollection: \code{\link{doJobCollection}()} } \concept{JobCollection} ================================================ FILE: man/JobExperiment.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Job.R \name{makeJob} \alias{makeJob} \alias{Job} \alias{Experiment} \title{Jobs and Experiments} \usage{ makeJob(id, reader = NULL, reg = getDefaultRegistry()) } \arguments{ \item{id}{[\code{integer(1)} or \code{data.table}]\cr Single integer to specify the job or a \code{data.table} with column \code{job.id} and exactly one row.} \item{reader}{[\code{RDSReader} | \code{NULL}]\cr Reader object to retrieve files. Used internally to cache reading from the file system. The default (\code{NULL}) does not make use of caching.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{Job} | \code{Experiment}]. } \description{ Jobs and Experiments are abstract objects which hold all information necessary to execute a single computational job for a \code{\link{Registry}} or \code{\link{ExperimentRegistry}}, respectively. They can be created using the constructor \code{makeJob} which takes a single job id. Jobs and Experiments are passed to reduce functions like \code{\link{reduceResults}}. Furthermore, Experiments can be used in the functions of the \code{\link{Problem}} and \code{\link{Algorithm}}. Jobs and Experiments hold these information: \describe{ \item{\code{job.id}}{Job ID as integer.} \item{\code{pars}}{ Job parameters as named list. For \code{\link{ExperimentRegistry}}, the parameters are divided into the sublists \dQuote{prob.pars} and \dQuote{algo.pars}. } \item{\code{seed}}{Seed which is set via \code{\link{doJobCollection}} as scalar integer.} \item{\code{resources}}{Computational resources which were set for this job as named list.} \item{\code{external.dir}}{ Path to a directory which is created exclusively for this job. You can store external files here. Directory is persistent between multiple restarts of the job and can be cleaned by calling \code{\link{resetJobs}}. } \item{\code{fun}}{Job only: User function passed to \code{\link{batchMap}}.} \item{\code{prob.name}}{Experiments only: Problem id.} \item{\code{algo.name}}{Experiments only: Algorithm id.} \item{\code{problem}}{Experiments only: \code{\link{Problem}}.} \item{\code{instance}}{Experiments only: Problem instance.} \item{\code{algorithm}}{Experiments only: \code{\link{Algorithm}}.} \item{\code{repl}}{Experiments only: Replication number.} } Note that the slots \dQuote{pars}, \dQuote{fun}, \dQuote{algorithm} and \dQuote{problem} lazy-load required files from the file system and construct the object on the first access. The realizations are cached for all slots except \dQuote{instance} (which might be stochastic). Jobs and Experiments can be executed manually with \code{\link{execJob}}. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) batchMap(function(x, y) x + y, x = 1:2, more.args = list(y = 99), reg = tmp) submitJobs(resources = list(foo = "bar"), reg = tmp) job = makeJob(1, reg = tmp) print(job) # Get the parameters: job$pars # Get the job resources: job$resources # Execute the job locally: execJob(job) } ================================================ FILE: man/JobNames.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/JobNames.R \name{JobNames} \alias{JobNames} \alias{setJobNames} \alias{getJobNames} \title{Set and Retrieve Job Names} \usage{ setJobNames(ids = NULL, names, reg = getDefaultRegistry()) getJobNames(ids = NULL, reg = getDefaultRegistry()) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.} \item{names}{[\code{character}]\cr Character vector of the same length as provided ids.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ \code{setJobNames} returns \code{NULL} invisibly, \code{getJobTable} returns a \code{data.table} with columns \code{job.id} and \code{job.name}. } \description{ Set custom names for jobs. These are passed to the template as \sQuote{job.name}. If no custom name is set (or any of the job names of the chunk is missing), the job hash is used as job name. Individual job names can be accessed via \code{jobs$job.name}. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) ids = batchMap(identity, 1:10, reg = tmp) setJobNames(ids, letters[1:nrow(ids)], reg = tmp) getJobNames(reg = tmp) } ================================================ FILE: man/JoinTables.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Joins.R \name{JoinTables} \alias{JoinTables} \alias{ijoin} \alias{ljoin} \alias{rjoin} \alias{ojoin} \alias{sjoin} \alias{ajoin} \alias{ujoin} \title{Inner, Left, Right, Outer, Semi and Anti Join for Data Tables} \usage{ ijoin(x, y, by = NULL) ljoin(x, y, by = NULL) rjoin(x, y, by = NULL) ojoin(x, y, by = NULL) sjoin(x, y, by = NULL) ajoin(x, y, by = NULL) ujoin(x, y, all.y = FALSE, by = NULL) } \arguments{ \item{x}{[\code{\link{data.frame}}]\cr First data.frame to join.} \item{y}{[\code{\link{data.frame}}]\cr Second data.frame to join.} \item{by}{[\code{character}]\cr Column name(s) of variables used to match rows in \code{x} and \code{y}. If not provided, a heuristic similar to the one described in the \pkg{dplyr} vignette is used: \enumerate{ \item If \code{x} is keyed, the existing key will be used if \code{y} has the same column(s). \item If \code{x} is not keyed, the intersect of common columns names is used if not empty. \item Raise an exception. } You may pass a named character vector to merge on columns with different names in \code{x} and \code{y}: \code{by = c("x.id" = "y.id")} will match \code{x}'s \dQuote{x.id} column with \code{y}\'s \dQuote{y.id} column.} \item{all.y}{[logical(1)]\cr Keep columns of \code{y} which are not in \code{x}?} } \value{ [\code{\link[data.table]{data.table}}] with key identical to \code{by}. } \description{ These helper functions perform join operations on data tables. Most of them are basically one-liners. See \url{https://rpubs.com/ronasta/join_data_tables} for a overview of join operations in data table or alternatively \pkg{dplyr}'s vignette on two table verbs. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } # Create two tables for demonstration tmp = makeRegistry(file.dir = NA, make.default = FALSE) batchMap(identity, x = 1:6, reg = tmp) x = getJobPars(reg = tmp) y = findJobs(x >= 2 & x <= 5, reg = tmp) y$extra.col = head(letters, nrow(y)) # Inner join: similar to intersect(): keep all columns of x and y with common matches ijoin(x, y) # Left join: use all ids from x, keep all columns of x and y ljoin(x, y) # Right join: use all ids from y, keep all columns of x and y rjoin(x, y) # Outer join: similar to union(): keep all columns of x and y with matches in x or y ojoin(x, y) # Semi join: filter x with matches in y sjoin(x, y) # Anti join: filter x with matches not in y ajoin(x, y) # Updating join: Replace values in x with values in y ujoin(x, y) } ================================================ FILE: man/Tags.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Tags.R \name{Tags} \alias{Tags} \alias{addJobTags} \alias{removeJobTags} \alias{getUsedJobTags} \title{Add or Remove Job Tags} \usage{ addJobTags(ids = NULL, tags, reg = getDefaultRegistry()) removeJobTags(ids = NULL, tags, reg = getDefaultRegistry()) getUsedJobTags(ids = NULL, reg = getDefaultRegistry()) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.} \item{tags}{[\code{character}]\cr Tags to add or remove as strings. Each tag may consist of letters, numbers, underscore and dots (pattern \dQuote{^[[:alnum:]_.]+}).} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] with job ids affected (invisible). } \description{ Add and remove arbitrary tags to jobs. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) ids = batchMap(sqrt, x = -3:3, reg = tmp) # Add new tag to all ids addJobTags(ids, "needs.computation", reg = tmp) getJobTags(reg = tmp) # Add more tags addJobTags(findJobs(x < 0, reg = tmp), "x.neg", reg = tmp) addJobTags(findJobs(x > 0, reg = tmp), "x.pos", reg = tmp) getJobTags(reg = tmp) # Submit first 5 jobs and remove tag if successful ids = submitJobs(1:5, reg = tmp) if (waitForJobs(reg = tmp)) removeJobTags(ids, "needs.computation", reg = tmp) getJobTags(reg = tmp) # Grep for warning message and add a tag addJobTags(grepLogs(pattern = "NaNs produced", reg = tmp), "div.zero", reg = tmp) getJobTags(reg = tmp) # All tags where tag x.neg is set: ids = findTagged("x.neg", reg = tmp) getUsedJobTags(ids, reg = tmp) } ================================================ FILE: man/Worker.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Worker.R \docType{class} \name{Worker} \alias{Worker} \title{Create a Linux-Worker} \format{ An \code{\link[R6]{R6Class}} generator object } \value{ [\code{\link{Worker}}]. } \description{ \code{\link[R6]{R6Class}} to create local and remote linux workers. } \section{Fields}{ \describe{ \item{\code{nodename}}{Host name. Set via constructor.} \item{\code{ncpus}}{Number of CPUs. Set via constructor and defaults to a heuristic which tries to detect the number of CPUs of the machine.} \item{\code{max.load}}{Maximum load average (of the last 5 min). Set via constructor and defaults to the number of CPUs of the machine.} \item{\code{status}}{Status of the worker; one of \dQuote{unknown}, \dQuote{available}, \dQuote{max.cpus} and \dQuote{max.load}.} }} \section{Methods}{ \describe{ \item{\code{new(nodename, ncpus, max.load)}}{Constructor.} \item{\code{update(reg)}}{Update the worker status.} \item{\code{list(reg)}}{List running jobs.} \item{\code{start(reg, fn, outfile)}}{Start job collection in file \dQuote{fn} and output to \dQuote{outfile}.} \item{\code{kill(reg, batch.id)}}{Kill job matching the \dQuote{batch.id}.} } } \examples{ \dontrun{ # create a worker for the local machine and use 4 CPUs. Worker$new("localhost", ncpus = 4) } } ================================================ FILE: man/addAlgorithm.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Algorithm.R \name{addAlgorithm} \alias{addAlgorithm} \alias{Algorithm} \alias{removeAlgorithms} \title{Define Algorithms for Experiments} \usage{ addAlgorithm(name, fun = NULL, reg = getDefaultRegistry()) removeAlgorithms(name, reg = getDefaultRegistry()) } \arguments{ \item{name}{[\code{character(1)}]\cr Unique identifier for the algorithm.} \item{fun}{[\code{function}]\cr The algorithm function. The static problem part is passed as \dQuote{data}, the generated problem instance is passed as \dQuote{instance} and the \code{\link{Job}}/\code{\link{Experiment}} as \dQuote{job}. Therefore, your function must have the formal arguments \dQuote{job}, \dQuote{data} and \dQuote{instance} (or dots \code{...}). If you do not provide a function, it defaults to a function which just returns the instance.} \item{reg}{[\code{\link{ExperimentRegistry}}]\cr Registry. If not explicitly passed, uses the last created registry.} } \value{ [\code{Algorithm}]. Object of class \dQuote{Algorithm}. } \description{ Algorithms are functions which get the \code{data} part as well as the problem instance (the return value of the function defined in \code{\link{Problem}}) and return an arbitrary R object. This function serializes all components to the file system and registers the algorithm in the \code{\link{ExperimentRegistry}}. \code{removeAlgorithm} removes all jobs from the registry which depend on the specific algorithm. \code{reg$algorithms} holds the IDs of already defined algorithms. } \seealso{ \code{\link{Problem}}, \code{\link{addExperiments}} } ================================================ FILE: man/addExperiments.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/addExperiments.R \name{addExperiments} \alias{addExperiments} \title{Add Experiments to the Registry} \usage{ addExperiments( prob.designs = NULL, algo.designs = NULL, repls = 1L, combine = "crossprod", reg = getDefaultRegistry() ) } \arguments{ \item{prob.designs}{[named list of \code{\link[base]{data.frame}}]\cr Named list of data frames (or \code{\link[data.table]{data.table}}). The name must match the problem name while the column names correspond to parameters of the problem. If \code{NULL}, experiments for all defined problems without any parameters are added.} \item{algo.designs}{[named list of \code{\link[data.table]{data.table}} or \code{\link[base]{data.frame}}]\cr Named list of data frames (or \code{\link[data.table]{data.table}}). The name must match the algorithm name while the column names correspond to parameters of the algorithm. If \code{NULL}, experiments for all defined algorithms without any parameters are added.} \item{repls}{[\code{integer()}]\cr Number of replications for each problem design in `prob.designs` (automatically replicated to the correct length).} \item{combine}{[\code{character(1)}]\cr How to combine the rows of a single problem design with the rows of a single algorithm design? Default is \dQuote{crossprod} which combines each row of the problem design which each row of the algorithm design in a cross-product fashion. Set to \dQuote{bind} to just \code{\link[base]{cbind}} the tables of problem and algorithm designs where the shorter table is repeated if necessary.} \item{reg}{[\code{\link{ExperimentRegistry}}]\cr Registry. If not explicitly passed, uses the last created registry.} } \value{ [\code{\link[data.table]{data.table}}] with ids of added jobs stored in column \dQuote{job.id}. } \description{ Adds experiments (parametrized combinations of problems with algorithms) to the registry and thereby defines batch jobs. If multiple problem designs or algorithm designs are provided, they are combined via the Cartesian product. E.g., if you have two problems \code{p1} and \code{p2} and three algorithms \code{a1}, \code{a2} and \code{a3}, \code{addExperiments} creates experiments for all parameters for the combinations \code{(p1, a1)}, \code{(p1, a2)}, \code{(p1, a3)}, \code{(p2, a1)}, \code{(p2, a2)} and \code{(p2, a3)}. } \note{ R's \code{data.frame} converts character vectors to factors by default in R versions prior to 4.0.0 which frequently resulted in problems using \code{addExperiments}. Therefore, this function will warn about factor variables if the following conditions hold: \enumerate{ \item R version is < 4.0.0 \item The design is passed as a \code{data.frame}, not a \code{\link[data.table]{data.table}} or \code{\link[tibble]{tibble}}. \item The option \dQuote{stringsAsFactors} is not set or set to \code{TRUE}. } } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE) # add first problem fun = function(job, data, n, mean, sd, ...) rnorm(n, mean = mean, sd = sd) addProblem("rnorm", fun = fun, reg = tmp) # add second problem fun = function(job, data, n, lambda, ...) rexp(n, rate = lambda) addProblem("rexp", fun = fun, reg = tmp) # add first algorithm fun = function(instance, method, ...) if (method == "mean") mean(instance) else median(instance) addAlgorithm("average", fun = fun, reg = tmp) # add second algorithm fun = function(instance, ...) sd(instance) addAlgorithm("deviation", fun = fun, reg = tmp) # define problem and algorithm designs library(data.table) prob.designs = algo.designs = list() prob.designs$rnorm = CJ(n = 100, mean = -1:1, sd = 1:5) prob.designs$rexp = data.table(n = 100, lambda = 1:5) algo.designs$average = data.table(method = c("mean", "median")) algo.designs$deviation = data.table() # add experiments and submit addExperiments(prob.designs, algo.designs, reg = tmp) # check what has been created summarizeExperiments(reg = tmp) unwrap(getJobPars(reg = tmp)) } \seealso{ Other Experiment: \code{\link{removeExperiments}()}, \code{\link{summarizeExperiments}()} } \concept{Experiment} ================================================ FILE: man/addProblem.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Problem.R \name{addProblem} \alias{addProblem} \alias{Problem} \alias{removeProblems} \title{Define Problems for Experiments} \usage{ addProblem( name, data = NULL, fun = NULL, seed = NULL, cache = FALSE, reg = getDefaultRegistry() ) removeProblems(name, reg = getDefaultRegistry()) } \arguments{ \item{name}{[\code{character(1)}]\cr Unique identifier for the problem.} \item{data}{[\code{ANY}]\cr Static problem part. Default is \code{NULL}.} \item{fun}{[\code{function}]\cr The function defining the stochastic problem part. The static part is passed to this function with name \dQuote{data} and the \code{\link{Job}}/\code{\link{Experiment}} is passed as \dQuote{job}. Therefore, your function must have the formal arguments \dQuote{job} and \dQuote{data} (or dots \code{...}). If you do not provide a function, it defaults to a function which just returns the data part.} \item{seed}{[\code{integer(1)}]\cr Start seed for this problem. This allows the \dQuote{synchronization} of a stochastic problem across algorithms, so that different algorithms are evaluated on the same stochastic instance. If the problem seed is defined, the seeding mechanism works as follows: (1) Before the dynamic part of a problem is instantiated, the seed of the problem + [replication number] - 1 is set, i.e. the first replication uses the problem seed. (2) The stochastic part of the problem is instantiated. (3) From now on the usual experiment seed of the registry is used, see \code{\link{ExperimentRegistry}}. If \code{seed} is set to \code{NULL} (default), the job seed is used to instantiate the problem and different algorithms see different stochastic instances of the same problem.} \item{cache}{[\code{logical(1)}]\cr If \code{TRUE} and \code{seed} is set, problem instances will be cached on the file system. This assumes that each problem instance is deterministic for each combination of hyperparameter setting and each replication number. This feature is experimental.} \item{reg}{[\code{\link{ExperimentRegistry}}]\cr Registry. If not explicitly passed, uses the last created registry.} } \value{ [\code{Problem}]. Object of class \dQuote{Problem} (invisibly). } \description{ Problems may consist of up to two parts: A static, immutable part (\code{data} in \code{addProblem}) and a dynamic, stochastic part (\code{fun} in \code{addProblem}). For example, for statistical learning problems a data frame would be the static problem part while a resampling function would be the stochastic part which creates problem instance. This instance is then typically passed to a learning algorithm like a wrapper around a statistical model (\code{fun} in \code{\link{addAlgorithm}}). This function serialize all components to the file system and registers the problem in the \code{\link{ExperimentRegistry}}. \code{removeProblem} removes all jobs from the registry which depend on the specific problem. \code{reg$problems} holds the IDs of already defined problems. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE) addProblem("p1", fun = function(job, data) data, reg = tmp) addProblem("p2", fun = function(job, data) job, reg = tmp) addAlgorithm("a1", fun = function(job, data, instance) instance, reg = tmp) addExperiments(repls = 2, reg = tmp) # List problems, algorithms and job parameters: tmp$problems tmp$algorithms getJobPars(reg = tmp) # Remove one problem removeProblems("p1", reg = tmp) # List problems and algorithms: tmp$problems tmp$algorithms getJobPars(reg = tmp) } \seealso{ \code{\link{Algorithm}}, \code{\link{addExperiments}} } ================================================ FILE: man/assertRegistry.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Registry.R \name{assertRegistry} \alias{assertRegistry} \title{assertRegistry} \usage{ assertRegistry( reg, class = NULL, writeable = FALSE, sync = FALSE, running.ok = TRUE ) } \arguments{ \item{reg}{[\code{\link{Registry}}]\cr The object asserted to be a \code{Registry}.} \item{class}{[\code{character(1)}]\cr If \code{NULL} (default), \code{reg} must only inherit from class \dQuote{Registry}. Otherwise check that \code{reg} is of class \code{class}. E.g., if set to \dQuote{Registry}, a \code{\link{ExperimentRegistry}} would not pass.} \item{writeable}{[\code{logical(1)}]\cr Check if the registry is writeable.} \item{sync}{[\code{logical(1)}]\cr Try to synchronize the registry by including pending results from the file system. See \code{\link{syncRegistry}}.} \item{running.ok}{[\code{logical(1)}]\cr If \code{FALSE} throw an error if jobs associated with the registry are currently running.} } \value{ \code{TRUE} invisibly. } \description{ Assert that a given object is a \code{batchtools} registry. Additionally can sync the registry, check if it is writeable, or check if jobs are running. If any check fails, throws an error indicting the reason for the failure. } ================================================ FILE: man/batchExport.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Export.R \name{batchExport} \alias{batchExport} \title{Export Objects to the Slaves} \usage{ batchExport( export = list(), unexport = character(0L), reg = getDefaultRegistry() ) } \arguments{ \item{export}{[\code{list}]\cr Named list of objects to export.} \item{unexport}{[\code{character}]\cr Vector of object names to unexport.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{data.table}] with name and uri to the exported objects. } \description{ Objects are saved in subdirectory \dQuote{exports} of the \dQuote{file.dir} of \code{reg}. They are automatically loaded and placed in the global environment each time the registry is loaded or a job collection is executed. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) # list exports exports = batchExport(reg = tmp) print(exports) # add a job and required exports batchMap(function(x) x^2 + y + z, x = 1:3, reg = tmp) exports = batchExport(export = list(y = 99, z = 1), reg = tmp) print(exports) submitJobs(reg = tmp) waitForJobs(reg = tmp) stopifnot(loadResult(1, reg = tmp) == 101) # Un-export z exports = batchExport(unexport = "z", reg = tmp) print(exports) } ================================================ FILE: man/batchMap.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/batchMap.R \name{batchMap} \alias{batchMap} \title{Map Operation for Batch Systems} \usage{ batchMap( fun, ..., args = list(), more.args = list(), reg = getDefaultRegistry() ) } \arguments{ \item{fun}{[\code{function}]\cr Function to map over arguments provided via \code{...}. Parameters given via \code{args} or \code{...} are passed as-is, in the respective order and possibly named. If the function has the named formal argument \dQuote{.job}, the \code{\link{Job}} is passed to the function on the slave.} \item{...}{[ANY]\cr Arguments to vectorize over (list or vector). Shorter vectors will be recycled (possibly with a warning any length is not a multiple of the longest length). Mutually exclusive with \code{args}. Note that although it is possible to iterate over large objects (e.g., lists of data frames or matrices), this usually hurts the overall performance and thus is discouraged.} \item{args}{[\code{list} | \code{data.frame}]\cr Arguments to vectorize over as (named) list or data frame. Shorter vectors will be recycled (possibly with a warning any length is not a multiple of the longest length). Mutually exclusive with \code{...}.} \item{more.args}{[\code{list}]\cr A list of further arguments passed to \code{fun}. Default is an empty list.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] with ids of added jobs stored in column \dQuote{job.id}. } \description{ A parallel and asynchronous \code{\link[base]{Map}}/\code{\link[base]{mapply}} for batch systems. Note that this function only defines the computational jobs. The actual computation is started with \code{\link{submitJobs}}. Results and partial results can be collected with \code{\link{reduceResultsList}}, \code{\link{reduceResults}} or \code{\link{loadResult}}. For a synchronous \code{\link[base]{Map}}-like execution, see \code{\link{btmapply}}. } \examples{ \dontshow{ batchtools:::example_push_temp(3) } # example using "..." and more.args tmp = makeRegistry(file.dir = NA, make.default = FALSE) f = function(x, y) x^2 + y ids = batchMap(f, x = 1:10, more.args = list(y = 100), reg = tmp) getJobPars(reg = tmp) testJob(6, reg = tmp) # 100 + 6^2 = 136 # vector recycling tmp = makeRegistry(file.dir = NA, make.default = FALSE) f = function(...) list(...) ids = batchMap(f, x = 1:3, y = 1:6, reg = tmp) getJobPars(reg = tmp) # example for an expand.grid()-like operation on parameters tmp = makeRegistry(file.dir = NA, make.default = FALSE) ids = batchMap(paste, args = data.table::CJ(x = letters[1:3], y = 1:3), reg = tmp) getJobPars(reg = tmp) testJob(6, reg = tmp) } \seealso{ \code{\link{batchReduce}} } ================================================ FILE: man/batchMapResults.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/batchMapResults.R \name{batchMapResults} \alias{batchMapResults} \title{Map Over Results to Create New Jobs} \usage{ batchMapResults( fun, ids = NULL, ..., more.args = list(), target, source = getDefaultRegistry() ) } \arguments{ \item{fun}{[\code{function}]\cr Function which takes the result as first (unnamed) argument.} \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of \code{\link{findDone}}. Invalid ids are ignored.} \item{...}{[ANY]\cr Arguments to vectorize over (list or vector). Passed to \code{\link{batchMap}}.} \item{more.args}{[\code{list}]\cr A list of further arguments passed to \code{fun}. Default is an empty list.} \item{target}{[\code{\link{Registry}}]\cr Empty Registry where new jobs are created for.} \item{source}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] with ids of jobs added to \code{target}. } \description{ This function allows you to create new computational jobs (just like \code{\link{batchMap}} based on the results of a \code{\link{Registry}}. } \note{ The URI to the result files in registry \code{source} is hard coded as parameter in the \code{target} registry. This means that \code{target} is currently not portable between systems for computation. } \examples{ \dontshow{ batchtools:::example_push_temp(2) } # Source registry: calculate square of some numbers tmp = makeRegistry(file.dir = NA, make.default = FALSE) batchMap(function(x) list(square = x^2), x = 1:10, reg = tmp) submitJobs(reg = tmp) waitForJobs(reg = tmp) # Target registry: calculate the square root on results of first registry target = makeRegistry(file.dir = NA, make.default = FALSE) batchMapResults(fun = function(x, y) list(sqrt = sqrt(x$square)), ids = 4:8, target = target, source = tmp) submitJobs(reg = target) waitForJobs(reg = target) # Map old to new ids. First, get a table with results and parameters results = unwrap(rjoin(getJobPars(reg = target), reduceResultsDataTable(reg = target))) print(results) # Parameter '.id' points to job.id in 'source'. Use a inner join to combine: ijoin(results, unwrap(reduceResultsDataTable(reg = tmp)), by = c(".id" = "job.id")) } \seealso{ Other Results: \code{\link{loadResult}()}, \code{\link{reduceResults}()}, \code{\link{reduceResultsList}()} } \concept{Results} ================================================ FILE: man/batchReduce.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/batchReduce.R \name{batchReduce} \alias{batchReduce} \title{Reduce Operation for Batch Systems} \usage{ batchReduce( fun, xs, init = NULL, chunks = seq_along(xs), more.args = list(), reg = getDefaultRegistry() ) } \arguments{ \item{fun}{[\code{function(aggr, x, ...)}]\cr Function to reduce \code{xs} with.} \item{xs}{[\code{vector}]\cr Vector to reduce.} \item{init}{[ANY]\cr Initial object for reducing. See \code{\link[base]{Reduce}}.} \item{chunks}{[\code{integer(length(xs))}]\cr Group for each element of \code{xs}. Can be generated with \code{\link{chunk}}.} \item{more.args}{[\code{list}]\cr A list of additional arguments passed to \code{fun}.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] with ids of added jobs stored in column \dQuote{job.id}. } \description{ A parallel and asynchronous \code{\link[base]{Reduce}} for batch systems. Note that this function only defines the computational jobs. Each job reduces a certain number of elements on one slave. The actual computation is started with \code{\link{submitJobs}}. Results and partial results can be collected with \code{\link{reduceResultsList}}, \code{\link{reduceResults}} or \code{\link{loadResult}}. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } # define function to reduce on slave, we want to sum a vector tmp = makeRegistry(file.dir = NA, make.default = FALSE) xs = 1:100 f = function(aggr, x) aggr + x # sum 20 numbers on each slave process, i.e. 5 jobs chunks = chunk(xs, chunk.size = 5) batchReduce(fun = f, 1:100, init = 0, chunks = chunks, reg = tmp) submitJobs(reg = tmp) waitForJobs(reg = tmp) # now reduce one final time on master reduceResults(fun = function(aggr, job, res) f(aggr, res), reg = tmp) } \seealso{ \code{\link{batchMap}} } ================================================ FILE: man/batchtools-package.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/zzz.R \docType{package} \name{batchtools-package} \alias{batchtools} \alias{batchtools-package} \title{batchtools: Tools for Computation on Batch Systems} \description{ For bug reports and feature requests please use the tracker: \url{https://github.com/mlr-org/batchtools}. } \section{Package options}{ \describe{ \item{\code{batchtools.verbose}}{ Verbosity. Set to \code{FALSE} to suppress info messages and progress bars. } \item{\code{batchtools.progress}}{ Progress bars. Set to \code{FALSE} to disable them. } \item{\code{batchtools.timestamps}}{ Add time stamps to log output. Set to \code{FALSE} to disable them. } } Furthermore, you may enable a debug mode using the \pkg{debugme} package by setting the environment variable \dQuote{DEBUGME} to \dQuote{batchtools} before loading \pkg{batchtools}. } \seealso{ Useful links: \itemize{ \item \url{https://github.com/mlr-org/batchtools} \item \url{https://batchtools.mlr-org.com} \item Report bugs at \url{https://github.com/mlr-org/batchtools/issues} } } \author{ \strong{Maintainer}: Michel Lang \email{michellang@gmail.com} (\href{https://orcid.org/0000-0001-9754-0393}{ORCID}) Authors: \itemize{ \item Bernd Bischl \email{bernd_bischl@gmx.net} } Other contributors: \itemize{ \item Dirk Surmann \email{surmann@statistik.tu-dortmund.de} (\href{https://orcid.org/0000-0003-0873-137X}{ORCID}) [contributor] } } ================================================ FILE: man/btlapply.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/btlapply.R \name{btlapply} \alias{btlapply} \alias{btmapply} \title{Synchronous Apply Functions} \usage{ btlapply( X, fun, ..., resources = list(), n.chunks = NULL, chunk.size = NULL, reg = makeRegistry(file.dir = NA) ) btmapply( fun, ..., more.args = list(), simplify = FALSE, use.names = TRUE, resources = list(), n.chunks = NULL, chunk.size = NULL, reg = makeRegistry(file.dir = NA) ) } \arguments{ \item{X}{[\code{\link[base]{vector}}]\cr Vector to apply over.} \item{fun}{[\code{function}]\cr Function to apply.} \item{...}{[\code{ANY}]\cr Additional arguments passed to \code{fun} (\code{btlapply}) or vectors to map over (\code{btmapply}).} \item{resources}{[\code{named list}]\cr Computational resources for the jobs to submit. The actual elements of this list (e.g. something like \dQuote{walltime} or \dQuote{nodes}) depend on your template file, exceptions are outlined in the section 'Resources'. Default settings for a system can be set in the configuration file by defining the named list \code{default.resources}. Note that these settings are merged by name, e.g. merging \code{list(walltime = 300)} into \code{list(walltime = 400, memory = 512)} will result in \code{list(walltime = 300, memory = 512)}. Same holds for individual job resources passed as additional column of \code{ids} (c.f. section 'Resources').} \item{n.chunks}{[\code{integer(1)}]\cr Passed to \code{\link{chunk}} before \code{\link{submitJobs}}.} \item{chunk.size}{[\code{integer(1)}]\cr Passed to \code{\link{chunk}} before \code{\link{submitJobs}}.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} \item{more.args}{[\code{list}]\cr Additional arguments passed to \code{fun}.} \item{simplify}{[\code{logical(1)}]\cr Simplify the results using \code{\link[base]{simplify2array}}?} \item{use.names}{[\code{logical(1)}]\cr Use names of the input to name the output?} } \value{ [\code{list}] List with the results of the function call. } \description{ This is a set of functions acting as counterparts to the sequential popular apply functions in base R: \code{btlapply} for \code{\link[base]{lapply}} and \code{btmapply} for \code{\link[base]{mapply}}. Internally, jobs are created using \code{\link{batchMap}} on the provided registry. If no registry is provided, a temporary registry (see argument \code{file.dir} of \code{\link{makeRegistry}}) and \code{\link{batchMap}} will be used. After all jobs are terminated (see \code{\link{waitForJobs}}), the results are collected and returned as a list. Note that these functions are only suitable for short and fail-safe operations on batch system. If some jobs fail, you have to retrieve partial results from the registry directory yourself. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } btlapply(1:3, function(x) x^2) btmapply(function(x, y, z) x + y + z, x = 1:3, y = 1:3, more.args = list(z = 1), simplify = TRUE) } ================================================ FILE: man/cfBrewTemplate.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctions.R \name{cfBrewTemplate} \alias{cfBrewTemplate} \title{Cluster Functions Helper to Write Job Description Files} \usage{ cfBrewTemplate(reg, text, jc) } \arguments{ \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} \item{text}{[\code{character(1)}]\cr String ready to be brewed. See \code{\link{cfReadBrewTemplate}} to read a template from the file system.} \item{jc}{[\code{\link{JobCollection})}]\cr Will be used as environment to brew the template file in. See \code{\link{JobCollection}} for a list of all available variables.} } \value{ [\code{character(1)}]. File path to brewed template file. } \description{ This function is only intended for use in your own cluster functions implementation. Calls brew silently on your template, any error will lead to an exception. The file is stored at the same place as the corresponding job file in the \dQuote{jobs}-subdir of your files directory. } \seealso{ Other ClusterFunctionsHelper: \code{\link{cfHandleUnknownSubmitError}()}, \code{\link{cfKillJob}()}, \code{\link{cfReadBrewTemplate}()}, \code{\link{makeClusterFunctions}()}, \code{\link{makeSubmitJobResult}()}, \code{\link{runOSCommand}()} } \concept{ClusterFunctionsHelper} ================================================ FILE: man/cfHandleUnknownSubmitError.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctions.R \name{cfHandleUnknownSubmitError} \alias{cfHandleUnknownSubmitError} \title{Cluster Functions Helper to Handle Unknown Errors} \usage{ cfHandleUnknownSubmitError(cmd, exit.code, output) } \arguments{ \item{cmd}{[\code{character(1)}]\cr OS command used to submit the job, e.g. qsub.} \item{exit.code}{[\code{integer(1)}]\cr Exit code of the OS command, should not be 0.} \item{output}{[\code{character}]\cr Output of the OS command, hopefully an informative error message. If these are multiple lines in a vector, they are automatically joined.} } \value{ [\code{\link{SubmitJobResult}}]. } \description{ This function is only intended for use in your own cluster functions implementation. Simply constructs a \code{\link{SubmitJobResult}} object with status code 101, NA as batch id and an informative error message containing the output of the OS command in \code{output}. } \seealso{ Other ClusterFunctionsHelper: \code{\link{cfBrewTemplate}()}, \code{\link{cfKillJob}()}, \code{\link{cfReadBrewTemplate}()}, \code{\link{makeClusterFunctions}()}, \code{\link{makeSubmitJobResult}()}, \code{\link{runOSCommand}()} } \concept{ClusterFunctionsHelper} ================================================ FILE: man/cfKillJob.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctions.R \name{cfKillJob} \alias{cfKillJob} \title{Cluster Functions Helper to Kill Batch Jobs} \usage{ cfKillJob( reg, cmd, args = character(0L), max.tries = 3L, nodename = "localhost" ) } \arguments{ \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} \item{cmd}{[\code{character(1)}]\cr OS command, e.g. \dQuote{qdel}.} \item{args}{[\code{character}]\cr Arguments to \code{cmd}, including the batch id.} \item{max.tries}{[\code{integer(1)}]\cr Number of total times to try execute the OS command in cases of failures. Default is \code{3}.} \item{nodename}{[\code{character(1)}]\cr Name of the SSH node to run the command on. If set to \dQuote{localhost} (default), the command is not piped through SSH.} } \value{ \code{TRUE} on success. An exception is raised otherwise. } \description{ This function is only intended for use in your own cluster functions implementation. Calls the OS command to kill a job via \code{\link[base]{system}} like this: \dQuote{cmd batch.job.id}. If the command returns an exit code > 0, the command is repeated after a 1 second sleep \code{max.tries-1} times. If the command failed in all tries, an error is generated. } \seealso{ Other ClusterFunctionsHelper: \code{\link{cfBrewTemplate}()}, \code{\link{cfHandleUnknownSubmitError}()}, \code{\link{cfReadBrewTemplate}()}, \code{\link{makeClusterFunctions}()}, \code{\link{makeSubmitJobResult}()}, \code{\link{runOSCommand}()} } \concept{ClusterFunctionsHelper} ================================================ FILE: man/cfReadBrewTemplate.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctions.R \name{cfReadBrewTemplate} \alias{cfReadBrewTemplate} \title{Cluster Functions Helper to Parse a Brew Template} \usage{ cfReadBrewTemplate(template, comment.string = NA_character_) } \arguments{ \item{template}{[\code{character(1)}]\cr Path to template file which is then passed to \code{\link[brew]{brew}}.} \item{comment.string}{[\code{character(1)}]\cr Ignore lines starting with this string.} } \value{ [\code{character}]. } \description{ This function is only intended for use in your own cluster functions implementation. This function is only intended for use in your own cluster functions implementation. Simply reads your template file and returns it as a character vector. } \seealso{ Other ClusterFunctionsHelper: \code{\link{cfBrewTemplate}()}, \code{\link{cfHandleUnknownSubmitError}()}, \code{\link{cfKillJob}()}, \code{\link{makeClusterFunctions}()}, \code{\link{makeSubmitJobResult}()}, \code{\link{runOSCommand}()} } \concept{ClusterFunctionsHelper} ================================================ FILE: man/chunk.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/chunkIds.R \name{chunk} \alias{chunk} \alias{lpt} \alias{binpack} \title{Chunk Jobs for Sequential Execution} \usage{ chunk(x, n.chunks = NULL, chunk.size = NULL, shuffle = TRUE) lpt(x, n.chunks = 1L) binpack(x, chunk.size = max(x)) } \arguments{ \item{x}{[\code{numeric}]\cr For \code{chunk} an atomic vector (usually the \code{job.id}). For \code{binpack} and \code{lpt}, the weights to group.} \item{n.chunks}{[\code{integer(1)}]\cr Requested number of chunks. The function \code{chunk} distributes the number of elements in \code{x} evenly while \code{lpt} tries to even out the sum of elements in each chunk. If more chunks than necessary are requested, empty chunks are ignored. Mutually exclusive with \code{chunks.size}.} \item{chunk.size}{[\code{integer(1)}]\cr Requested chunk size for each single chunk. For \code{chunk} this is the number of elements in \code{x}, for \code{binpack} the size is determined by the sum of values in \code{x}. Mutually exclusive with \code{n.chunks}.} \item{shuffle}{[\code{logical(1)}]\cr Shuffles the groups. Default is \code{TRUE}.} } \value{ [\code{integer}] giving the chunk number for each element of \code{x}. } \description{ Jobs can be partitioned into \dQuote{chunks} to be executed sequentially on the computational nodes. Chunks are defined by providing a data frame with columns \dQuote{job.id} and \dQuote{chunk} (integer) to \code{\link{submitJobs}}. All jobs with the same chunk number will be grouped together on one node to form a single computational job. The function \code{chunk} simply splits \code{x} into either a fixed number of groups, or into a variable number of groups with a fixed number of maximum elements. The function \code{lpt} also groups \code{x} into a fixed number of chunks, but uses the actual values of \code{x} in a greedy \dQuote{Longest Processing Time} algorithm. As a result, the maximum sum of elements in minimized. \code{binpack} splits \code{x} into a variable number of groups whose sum of elements do not exceed the upper limit provided by \code{chunk.size}. See examples of \code{\link{estimateRuntimes}} for an application of \code{binpack} and \code{lpt}. } \examples{ \dontshow{ batchtools:::example_push_temp(2) } ch = chunk(1:10, n.chunks = 2) table(ch) ch = chunk(rep(1, 10), chunk.size = 2) table(ch) set.seed(1) x = runif(10) ch = lpt(x, n.chunks = 2) sapply(split(x, ch), sum) set.seed(1) x = runif(10) ch = binpack(x, 1) sapply(split(x, ch), sum) # Job chunking tmp = makeRegistry(file.dir = NA, make.default = FALSE) ids = batchMap(identity, 1:25, reg = tmp) ### Group into chunks with 10 jobs each library(data.table) ids[, chunk := chunk(job.id, chunk.size = 10)] print(ids[, .N, by = chunk]) ### Group into 4 chunks ids[, chunk := chunk(job.id, n.chunks = 4)] print(ids[, .N, by = chunk]) ### Submit to batch system submitJobs(ids = ids, reg = tmp) # Grouped chunking tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE) prob = addProblem(reg = tmp, "prob1", data = iris, fun = function(job, data) nrow(data)) prob = addProblem(reg = tmp, "prob2", data = Titanic, fun = function(job, data) nrow(data)) algo = addAlgorithm(reg = tmp, "algo", fun = function(job, data, instance, i, ...) problem) prob.designs = list(prob1 = data.table(), prob2 = data.table(x = 1:2)) algo.designs = list(algo = data.table(i = 1:3)) addExperiments(prob.designs, algo.designs, repls = 3, reg = tmp) ### Group into chunks of 5 jobs, but do not put multiple problems into the same chunk # -> only one problem has to be loaded per chunk, and only once because it is cached ids = getJobTable(reg = tmp)[, .(job.id, problem, algorithm)] ids[, chunk := chunk(job.id, chunk.size = 5), by = "problem"] ids[, chunk := .GRP, by = c("problem", "chunk")] dcast(ids, chunk ~ problem) } \seealso{ \code{\link{estimateRuntimes}} } ================================================ FILE: man/clearRegistry.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clearRegistry.R \name{clearRegistry} \alias{clearRegistry} \title{Remove All Jobs} \usage{ clearRegistry(reg = getDefaultRegistry()) } \arguments{ \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \description{ Removes all jobs from a registry and calls \code{\link{sweepRegistry}}. } \seealso{ Other Registry: \code{\link{getDefaultRegistry}()}, \code{\link{loadRegistry}()}, \code{\link{makeRegistry}()}, \code{\link{removeRegistry}()}, \code{\link{saveRegistry}()}, \code{\link{sweepRegistry}()}, \code{\link{syncRegistry}()} } \concept{Registry} ================================================ FILE: man/doJobCollection.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/doJobCollection.R \name{doJobCollection} \alias{doJobCollection} \title{Execute Jobs of a JobCollection} \usage{ doJobCollection(jc, output = NULL) } \arguments{ \item{jc}{[\code{\link{JobCollection}}]\cr Either an object of class \dQuote{JobCollection} as returned by \code{\link{makeJobCollection}} or a string with the path to file containing a \dQuote{JobCollection} as RDS file (as stored by \code{\link{submitJobs}}).} \item{output}{[\code{character(1)}]\cr Path to a file to write the output to. Defaults to \code{NULL} which means that output is written to the active \code{\link[base]{sink}}. Do not set this if your scheduler redirects output to a log file.} } \value{ [\code{character(1)}]: Hash of the \code{\link{JobCollection}} executed. } \description{ Executes every job in a \code{\link{JobCollection}}. This function is intended to be called on the slave. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) batchMap(identity, 1:2, reg = tmp) jc = makeJobCollection(1:2, reg = tmp) doJobCollection(jc) } \seealso{ Other JobCollection: \code{\link{makeJobCollection}()} } \concept{JobCollection} ================================================ FILE: man/estimateRuntimes.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/estimateRuntimes.R \name{estimateRuntimes} \alias{estimateRuntimes} \alias{print.RuntimeEstimate} \title{Estimate Remaining Runtimes} \usage{ estimateRuntimes(tab, ..., reg = getDefaultRegistry()) \method{print}{RuntimeEstimate}(x, n = 1L, ...) } \arguments{ \item{tab}{[\code{\link[data.table]{data.table}}]\cr Table with column \dQuote{job.id} and additional columns to predict the runtime. Observed runtimes will be looked up in the registry and serve as dependent variable. All columns in \code{tab} except \dQuote{job.id} will be passed to \code{\link[ranger]{ranger}} as independent variables to fit the model.} \item{...}{[ANY]\cr Additional parameters passed to \code{\link[ranger]{ranger}}. Ignored for the \code{print} method.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} \item{x}{[\code{RuntimeEstimate}]\cr Object to print.} \item{n}{[\code{integer(1)}]\cr Number of parallel jobs to assume for runtime estimation.} } \value{ [\code{RuntimeEstimate}] which is a \code{list} with two named elements: \dQuote{runtimes} is a \code{\link[data.table]{data.table}} with columns \dQuote{job.id}, \dQuote{runtime} (in seconds) and \dQuote{type} (\dQuote{estimated} if runtime is estimated, \dQuote{observed} if runtime was observed). The other element of the list named \dQuote{model}] contains the fitted random forest object. } \description{ Estimates the runtimes of jobs using the random forest implemented in \pkg{ranger}. Observed runtimes are retrieved from the \code{\link{Registry}} and runtimes are predicted for unfinished jobs. The estimated remaining time is calculated in the \code{print} method. You may also pass \code{n} here to determine the number of parallel jobs which is then used in a simple Longest Processing Time (LPT) algorithm to give an estimate for the parallel runtime. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } # Create a simple toy registry set.seed(1) tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE, seed = 1) addProblem(name = "iris", data = iris, fun = function(data, ...) nrow(data), reg = tmp) addAlgorithm(name = "nrow", function(instance, ...) nrow(instance), reg = tmp) addAlgorithm(name = "ncol", function(instance, ...) ncol(instance), reg = tmp) addExperiments(algo.designs = list(nrow = data.table::CJ(x = 1:50, y = letters[1:5])), reg = tmp) addExperiments(algo.designs = list(ncol = data.table::CJ(x = 1:50, y = letters[1:5])), reg = tmp) # We use the job parameters to predict runtimes tab = unwrap(getJobPars(reg = tmp)) # First we need to submit some jobs so that the forest can train on some data. # Thus, we just sample some jobs from the registry while grouping by factor variables. library(data.table) ids = tab[, .SD[sample(nrow(.SD), 5)], by = c("problem", "algorithm", "y")] setkeyv(ids, "job.id") submitJobs(ids, reg = tmp) waitForJobs(reg = tmp) # We "simulate" some more realistic runtimes here to demonstrate the functionality: # - Algorithm "ncol" is 5 times more expensive than "nrow" # - x has no effect on the runtime # - If y is "a" or "b", the runtimes are really high runtime = function(algorithm, x, y) { ifelse(algorithm == "nrow", 100L, 500L) + 1000L * (y \%in\% letters[1:2]) } tmp$status[ids, done := done + tab[ids, runtime(algorithm, x, y)]] rjoin(sjoin(tab, ids), getJobStatus(ids, reg = tmp)[, c("job.id", "time.running")]) # Estimate runtimes: est = estimateRuntimes(tab, reg = tmp) print(est) rjoin(tab, est$runtimes) print(est, n = 10) # Submit jobs with longest runtime first: ids = est$runtimes[type == "estimated"][order(runtime, decreasing = TRUE)] print(ids) \dontrun{ submitJobs(ids, reg = tmp) } # Group jobs into chunks with runtime < 1h ids = est$runtimes[type == "estimated"] ids[, chunk := binpack(runtime, 3600)] print(ids) print(ids[, list(runtime = sum(runtime)), by = chunk]) \dontrun{ submitJobs(ids, reg = tmp) } # Group jobs into 10 chunks with similar runtime ids = est$runtimes[type == "estimated"] ids[, chunk := lpt(runtime, 10)] print(ids[, list(runtime = sum(runtime)), by = chunk]) } \seealso{ \code{\link{binpack}} and \code{\link{lpt}} to chunk jobs according to their estimated runtimes. } ================================================ FILE: man/execJob.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/execJob.R \name{execJob} \alias{execJob} \title{Execute a Single Jobs} \usage{ execJob(job) } \arguments{ \item{job}{[\code{\link{Job}} | \code{\link{Experiment}}]\cr Job/Experiment to execute.} } \value{ Result of the job. } \description{ Executes a single job (as created by \code{\link{makeJob}}) and returns its result. Also works for Experiments. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) batchMap(identity, 1:2, reg = tmp) job = makeJob(1, reg = tmp) execJob(job) } ================================================ FILE: man/findConfFile.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/config.R \name{findConfFile} \alias{findConfFile} \title{Find a batchtools Configuration File} \usage{ findConfFile() } \value{ [\code{character(1)}] Path to the configuration file or \code{NA} if no configuration file was found. } \description{ This functions returns the path to the first configuration file found in the following locations: \enumerate{ \item{File \dQuote{batchtools.conf.R} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}.} \item{File \dQuote{batchtools.conf.R} in the current working directory.} \item{File \dQuote{config.R} in the user configuration directory as reported by \code{rappdirs::user_config_dir("batchtools", expand = FALSE)} (depending on OS, e.g., on linux this usually resolves to \dQuote{~/.config/batchtools/config.R}).} \item{\dQuote{.batchtools.conf.R} in the home directory (\dQuote{~}).} \item{\dQuote{config.R} in the site config directory as reported by \code{rappdirs::site_config_dir("batchtools")} (depending on OS). This file can be used for admins to set sane defaults for a computation site.} } } \keyword{internal} ================================================ FILE: man/findJobs.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/findJobs.R \name{findJobs} \alias{findJobs} \alias{findExperiments} \alias{findSubmitted} \alias{findNotSubmitted} \alias{findStarted} \alias{findNotStarted} \alias{findDone} \alias{findNotDone} \alias{findErrors} \alias{findOnSystem} \alias{findRunning} \alias{findQueued} \alias{findExpired} \alias{findTagged} \title{Find and Filter Jobs} \usage{ findJobs(expr, ids = NULL, reg = getDefaultRegistry()) findExperiments( ids = NULL, prob.name = NA_character_, prob.pattern = NA_character_, algo.name = NA_character_, algo.pattern = NA_character_, prob.pars, algo.pars, repls = NULL, reg = getDefaultRegistry() ) findSubmitted(ids = NULL, reg = getDefaultRegistry()) findNotSubmitted(ids = NULL, reg = getDefaultRegistry()) findStarted(ids = NULL, reg = getDefaultRegistry()) findNotStarted(ids = NULL, reg = getDefaultRegistry()) findDone(ids = NULL, reg = getDefaultRegistry()) findNotDone(ids = NULL, reg = getDefaultRegistry()) findErrors(ids = NULL, reg = getDefaultRegistry()) findOnSystem(ids = NULL, reg = getDefaultRegistry()) findRunning(ids = NULL, reg = getDefaultRegistry()) findQueued(ids = NULL, reg = getDefaultRegistry()) findExpired(ids = NULL, reg = getDefaultRegistry()) findTagged(tags = character(0L), ids = NULL, reg = getDefaultRegistry()) } \arguments{ \item{expr}{[\code{expression}]\cr Predicate expression evaluated in the job parameters. Jobs for which \code{expr} evaluates to \code{TRUE} are returned.} \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} \item{prob.name}{[\code{character}]\cr Exact name of the problem (no substring matching). If not provided, all problems are matched.} \item{prob.pattern}{[\code{character}]\cr Regular expression pattern to match problem names. If not provided, all problems are matched.} \item{algo.name}{[\code{character}]\cr Exact name of the problem (no substring matching). If not provided, all algorithms are matched.} \item{algo.pattern}{[\code{character}]\cr Regular expression pattern to match algorithm names. If not provided, all algorithms are matched.} \item{prob.pars}{[\code{expression}]\cr Predicate expression evaluated in the problem parameters.} \item{algo.pars}{[\code{expression}]\cr Predicate expression evaluated in the algorithm parameters.} \item{repls}{[\code{integer}]\cr Whitelist of replication numbers. If not provided, all replications are matched.} \item{tags}{[\code{character}]\cr Return jobs which are tagged with any of the tags provided.} } \value{ [\code{\link[data.table]{data.table}}] with column \dQuote{job.id} containing matched jobs. } \description{ These functions are used to find and filter jobs, depending on either their parameters (\code{findJobs} and \code{findExperiments}), their tags (\code{findTagged}), or their computational status (all other functions, see \code{\link{getStatus}} for an overview). Note that \code{findQueued}, \code{findRunning}, \code{findOnSystem} and \code{findExpired} are somewhat heuristic and may report misleading results, depending on the state of the system and the \code{\link{ClusterFunctions}} implementation. See \code{\link{JoinTables}} for convenient set operations (unions, intersects, differences) on tables with job ids. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) batchMap(identity, i = 1:3, reg = tmp) ids = findNotSubmitted(reg = tmp) # get all jobs: findJobs(reg = tmp) # filter for jobs with parameter i >= 2 findJobs(i >= 2, reg = tmp) # filter on the computational status findSubmitted(reg = tmp) findNotDone(reg = tmp) # filter on tags addJobTags(2:3, "my_tag", reg = tmp) findTagged(tags = "my_tag", reg = tmp) # combine filter functions using joins # -> jobs which are not done and not tagged (using an anti-join): ajoin(findNotDone(reg = tmp), findTagged("my_tag", reg = tmp)) } \seealso{ \code{\link{getStatus}} \code{\link{JoinTables}} } ================================================ FILE: man/findTemplateFile.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctions.R \name{findTemplateFile} \alias{findTemplateFile} \title{Find a batchtools Template File} \usage{ findTemplateFile(template) } \arguments{ \item{template}{[\code{character(1)}]\cr Either a path to a \pkg{brew} template file (with extension \dQuote{tmpl}), or a short descriptive name enabling the following heuristic for the file lookup: \enumerate{ \item \dQuote{batchtools.[template].tmpl} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}. \item \dQuote{batchtools.[template].tmpl} in the current working directory. \item \dQuote{[template].tmpl} in the user config directory (see \code{\link[rappdirs]{user_config_dir}}); on linux this is usually \dQuote{~/.config/batchtools/[template].tmpl}. \item \dQuote{.batchtools.[template].tmpl} in the home directory. \item \dQuote{[template].tmpl} in the package installation directory in the subfolder \dQuote{templates}. }} } \value{ [\code{character}] Path to the file or \code{NA} if no template template file was found. } \description{ This functions returns the path to a template file on the file system. } \keyword{internal} ================================================ FILE: man/getDefaultRegistry.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/getDefaultRegistry.R \name{getDefaultRegistry} \alias{getDefaultRegistry} \alias{setDefaultRegistry} \title{Get and Set the Default Registry} \usage{ getDefaultRegistry() setDefaultRegistry(reg) } \arguments{ \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \description{ \code{getDefaultRegistry} returns the registry currently set as default (or stops with an exception if none is set). \code{setDefaultRegistry} sets a registry as default. } \seealso{ Other Registry: \code{\link{clearRegistry}()}, \code{\link{loadRegistry}()}, \code{\link{makeRegistry}()}, \code{\link{removeRegistry}()}, \code{\link{saveRegistry}()}, \code{\link{sweepRegistry}()}, \code{\link{syncRegistry}()} } \concept{Registry} ================================================ FILE: man/getErrorMessages.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/getErrorMessages.R \name{getErrorMessages} \alias{getErrorMessages} \title{Retrieve Error Messages} \usage{ getErrorMessages( ids = NULL, missing.as.error = FALSE, reg = getDefaultRegistry() ) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of \code{\link{findErrors}}. Invalid ids are ignored.} \item{missing.as.error}{[\code{logical(1)}]\cr Treat missing results as errors? If \code{TRUE}, the error message \dQuote{[not terminated]} is imputed for jobs which have not terminated. Default is \code{FALSE}} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] with columns \dQuote{job.id}, \dQuote{terminated} (logical), \dQuote{error} (logical) and \dQuote{message} (string). } \description{ Extracts error messages from the internal data base and returns them in a table. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) fun = function(i) if (i == 3) stop(i) else i ids = batchMap(fun, i = 1:5, reg = tmp) submitJobs(1:4, reg = tmp) waitForJobs(1:4, reg = tmp) getErrorMessages(ids, reg = tmp) getErrorMessages(ids, missing.as.error = TRUE, reg = tmp) } \seealso{ Other debug: \code{\link{getStatus}()}, \code{\link{grepLogs}()}, \code{\link{killJobs}()}, \code{\link{resetJobs}()}, \code{\link{showLog}()}, \code{\link{testJob}()} } \concept{debug} ================================================ FILE: man/getJobTable.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/JobTables.R \name{getJobTable} \alias{getJobTable} \alias{getJobStatus} \alias{getJobResources} \alias{getJobPars} \alias{getJobTags} \title{Query Job Information} \usage{ getJobTable(ids = NULL, reg = getDefaultRegistry()) getJobStatus(ids = NULL, reg = getDefaultRegistry()) getJobResources(ids = NULL, reg = getDefaultRegistry()) getJobPars(ids = NULL, reg = getDefaultRegistry()) getJobTags(ids = NULL, reg = getDefaultRegistry()) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] with the following columns (not necessarily in this order): \describe{ \item{job.id}{Unique Job ID as integer.} \item{submitted}{Time the job was submitted to the batch system as \code{\link[base]{POSIXct}}.} \item{started}{Time the job was started on the batch system as \code{\link[base]{POSIXct}}.} \item{done}{Time the job terminated (successfully or with an error) as \code{\link[base]{POSIXct}}.} \item{error}{Either \code{NA} if the job terminated successfully or the error message.} \item{mem.used}{Estimate of the memory usage.} \item{batch.id}{Batch ID as reported by the scheduler.} \item{log.file}{Log file. If missing, defaults to \code{[job.hash].log}.} \item{job.hash}{Unique string identifying the job or chunk.} \item{time.queued}{Time in seconds (as \code{\link[base]{difftime}}) the job was queued.} \item{time.running}{Time in seconds (as \code{\link[base]{difftime}}) the job was running.} \item{pars}{List of parameters/arguments for this job.} \item{resources}{List of computational resources set for this job.} \item{tags}{Tags as joined string, delimited by \dQuote{,}.} \item{problem}{Only for \code{\link{ExperimentRegistry}}: the problem identifier.} \item{algorithm}{Only for \code{\link{ExperimentRegistry}}: the algorithm identifier.} } } \description{ \code{getJobStatus} returns the internal table which stores information about the computational status of jobs, \code{getJobPars} a table with the job parameters, \code{getJobResources} a table with the resources which were set to submit the jobs, and \code{getJobTags} the tags of the jobs (see \link{Tags}). \code{getJobTable} returns all these tables joined. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) f = function(x) if (x < 0) stop("x must be > 0") else sqrt(x) batchMap(f, x = c(-1, 0, 1), reg = tmp) submitJobs(reg = tmp) waitForJobs(reg = tmp) addJobTags(1:2, "tag1", reg = tmp) addJobTags(2, "tag2", reg = tmp) # Complete table: getJobTable(reg = tmp) # Job parameters: getJobPars(reg = tmp) # Set and retrieve tags: getJobTags(reg = tmp) # Job parameters with tags right-joined: rjoin(getJobPars(reg = tmp), getJobTags(reg = tmp)) } ================================================ FILE: man/getStatus.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/getStatus.R \name{getStatus} \alias{getStatus} \title{Summarize the Computational Status} \usage{ getStatus(ids = NULL, reg = getDefaultRegistry()) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] (with class \dQuote{Status} for printing). } \description{ This function gives an encompassing overview over the computational status on your system. The status can be one or many of the following: \itemize{ \item \dQuote{defined}: Jobs which are defined via \code{\link{batchMap}} or \code{\link{addExperiments}}, but are not yet submitted. \item \dQuote{submitted}: Jobs which are submitted to the batch system via \code{\link{submitJobs}}, scheduled for execution. \item \dQuote{started}: Jobs which have been started. \item \dQuote{done}: Jobs which terminated successfully. \item \dQuote{error}: Jobs which terminated with an exception. \item \dQuote{running}: Jobs which are listed by the cluster functions to be running on the live system. Not supported for all cluster functions. \item \dQuote{queued}: Jobs which are listed by the cluster functions to be queued on the live system. Not supported for all cluster functions. \item \dQuote{system}: Jobs which are listed by the cluster functions to be queued or running. Not supported for all cluster functions. \item \dQuote{expired}: Jobs which have been submitted, but vanished from the live system. Note that this is determined heuristically and may include some false positives. } Here, a job which terminated successfully counts towards the jobs which are submitted, started and done. To retrieve the corresponding job ids, see \code{\link{findJobs}}. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) fun = function(i) if (i == 3) stop(i) else i ids = batchMap(fun, i = 1:5, reg = tmp) submitJobs(ids = 1:4, reg = tmp) waitForJobs(reg = tmp) tab = getStatus(reg = tmp) print(tab) str(tab) } \seealso{ \code{\link{findJobs}} Other debug: \code{\link{getErrorMessages}()}, \code{\link{grepLogs}()}, \code{\link{killJobs}()}, \code{\link{resetJobs}()}, \code{\link{showLog}()}, \code{\link{testJob}()} } \concept{debug} ================================================ FILE: man/grepLogs.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Logs.R \name{grepLogs} \alias{grepLogs} \title{Grep Log Files for a Pattern} \usage{ grepLogs( ids = NULL, pattern, ignore.case = FALSE, fixed = FALSE, reg = getDefaultRegistry() ) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of \code{\link{findStarted}}. Invalid ids are ignored.} \item{pattern}{[\code{character(1L)}]\cr Regular expression or string (see \code{fixed}).} \item{ignore.case}{[\code{logical(1L)}]\cr If \code{TRUE} the match will be performed case insensitively.} \item{fixed}{[\code{logical(1L)}]\cr If \code{FALSE} (default), \code{pattern} is a regular expression and a fixed string otherwise.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] with columns \dQuote{job.id} and \dQuote{message}. } \description{ Crawls through log files and reports jobs with lines matching the \code{pattern}. See \code{\link{showLog}} for an example. } \seealso{ Other debug: \code{\link{getErrorMessages}()}, \code{\link{getStatus}()}, \code{\link{killJobs}()}, \code{\link{resetJobs}()}, \code{\link{showLog}()}, \code{\link{testJob}()} } \concept{debug} ================================================ FILE: man/killJobs.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/killJobs.R \name{killJobs} \alias{killJobs} \title{Kill Jobs} \usage{ killJobs(ids = NULL, reg = getDefaultRegistry()) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of \code{\link{findOnSystem}}. Invalid ids are ignored.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] with columns \dQuote{job.id}, the corresponding \dQuote{batch.id} and the logical flag \dQuote{killed} indicating success. } \description{ Kill jobs which are currently running on the batch system. In case of an error when killing, the function tries - after a short sleep - to kill the remaining batch jobs again. If this fails three times for some jobs, the function gives up. Jobs that could be successfully killed are reset in the \link{Registry}. } \seealso{ Other debug: \code{\link{getErrorMessages}()}, \code{\link{getStatus}()}, \code{\link{grepLogs}()}, \code{\link{resetJobs}()}, \code{\link{showLog}()}, \code{\link{testJob}()} } \concept{debug} ================================================ FILE: man/loadRegistry.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/loadRegistry.R \name{loadRegistry} \alias{loadRegistry} \title{Load a Registry from the File System} \usage{ loadRegistry( file.dir, work.dir = NULL, conf.file = findConfFile(), make.default = TRUE, writeable = FALSE ) } \arguments{ \item{file.dir}{[\code{character(1)}]\cr Path where all files of the registry are saved. Default is directory \dQuote{registry} in the current working directory. The provided path will get normalized unless it is given relative to the home directory (i.e., starting with \dQuote{~}). Note that some templates do not handle relative paths well. If you pass \code{NA}, a temporary directory will be used. This way, you can create disposable registries for \code{\link{btlapply}} or examples. By default, the temporary directory \code{\link[base]{tempdir}()} will be used. If you want to use another directory, e.g. a directory which is shared between nodes, you can set it in your configuration file by setting the variable \code{temp.dir}.} \item{work.dir}{[\code{character(1)}]\cr Working directory for R process for running jobs. Defaults to the working directory currently set during Registry construction (see \code{\link[base]{getwd}}). \code{loadRegistry} uses the stored \code{work.dir}, but you may also explicitly overwrite it, e.g., after switching to another system. The provided path will get normalized unless it is given relative to the home directory (i.e., starting with \dQuote{~}). Note that some templates do not handle relative paths well.} \item{conf.file}{[\code{character(1)}]\cr Path to a configuration file which is sourced while the registry is created. In the configuration file you can define how \pkg{batchtools} interacts with the system via \code{\link{ClusterFunctions}}. Separating the configuration of the underlying host system from the R code allows to easily move computation to another site. The file lookup is implemented in the internal (but exported) function \code{findConfFile} which returns the first file found of the following candidates: \enumerate{ \item{File \dQuote{batchtools.conf.R} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}.} \item{File \dQuote{batchtools.conf.R} in the current working directory.} \item{File \dQuote{config.R} in the user configuration directory as reported by \code{rappdirs::user_config_dir("batchtools", expand = FALSE)} (depending on OS, e.g., on linux this usually resolves to \dQuote{~/.config/batchtools/config.R}).} \item{\dQuote{.batchtools.conf.R} in the home directory (\dQuote{~}).} \item{\dQuote{config.R} in the site config directory as reported by \code{rappdirs::site_config_dir("batchtools")} (depending on OS). This file can be used for admins to set sane defaults for a computation site.} } Set to \code{NA} if you want to suppress reading any configuration file. If a configuration file is found, it gets sourced inside the environment of the registry after the defaults for all variables are set. Therefore you can set and overwrite slots, e.g. \code{default.resources = list(walltime = 3600)} to set default resources or \dQuote{max.concurrent.jobs} to limit the number of jobs allowed to run simultaneously on the system.} \item{make.default}{[\code{logical(1)}]\cr If set to \code{TRUE}, the created registry is saved inside the package namespace and acts as default registry. You might want to switch this off if you work with multiple registries simultaneously. Default is \code{TRUE}.} \item{writeable}{[\code{logical(1)}]\cr Loads the registry in read-write mode. Default is \code{FALSE}.} } \value{ [\code{\link{Registry}}]. } \description{ Loads a registry from its \code{file.dir}. Multiple R sessions accessing the same registry simultaneously can lead to database inconsistencies. This is especially dangerous if the same \code{file.dir} is accessed from multiple machines, e.g. via a mount. If you just need to check on the status or peek into some preliminary results while another process is still submitting or waiting for pending results, you can load the registry in a read-only mode. All operations that need to change the registry will raise an exception in this mode. Files communicated back by the computational nodes are parsed to update the registry in memory while the registry on the file system remains unchanged. A heuristic tries to detect if the registry has been altered in the background by an other process and in this case automatically restricts the current registry to read-only mode. However, you should rely on this heuristic to work flawlessly. Thus, set to \code{writeable} to \code{TRUE} if and only if you are absolutely sure that other state-changing processes are terminated. If you need write access, load the registry with \code{writeable} set to \code{TRUE}. } \seealso{ Other Registry: \code{\link{clearRegistry}()}, \code{\link{getDefaultRegistry}()}, \code{\link{makeRegistry}()}, \code{\link{removeRegistry}()}, \code{\link{saveRegistry}()}, \code{\link{sweepRegistry}()}, \code{\link{syncRegistry}()} } \concept{Registry} ================================================ FILE: man/loadResult.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/loadResult.R \name{loadResult} \alias{loadResult} \title{Load the Result of a Single Job} \usage{ loadResult(id, reg = getDefaultRegistry()) } \arguments{ \item{id}{[\code{integer(1)} or \code{data.table}]\cr Single integer to specify the job or a \code{data.table} with column \code{job.id} and exactly one row.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{ANY}]. The stored result. } \description{ Loads the result of a single job. } \seealso{ Other Results: \code{\link{batchMapResults}()}, \code{\link{reduceResults}()}, \code{\link{reduceResultsList}()} } \concept{Results} ================================================ FILE: man/makeClusterFunctions.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctions.R \name{makeClusterFunctions} \alias{makeClusterFunctions} \alias{ClusterFunctions} \title{ClusterFunctions Constructor} \usage{ makeClusterFunctions( name, submitJob, killJob = NULL, listJobsQueued = NULL, listJobsRunning = NULL, array.var = NA_character_, store.job.collection = FALSE, store.job.files = FALSE, scheduler.latency = 0, fs.latency = 0, hooks = list() ) } \arguments{ \item{name}{[\code{character(1)}]\cr Name of cluster functions.} \item{submitJob}{[\code{function(reg, jc, ...)}]\cr Function to submit new jobs. Must return a \code{\link{SubmitJobResult}} object. The arguments are \code{reg} (\code{\link{Registry}}) and \code{jobs} (\code{\link{JobCollection}}).} \item{killJob}{[\code{function(reg, batch.id)}]\cr Function to kill a job on the batch system. Make sure that you definitely kill the job! Return value is currently ignored. Must have the arguments \code{reg} (\code{\link{Registry}}) and \code{batch.id} (\code{character(1)} as returned by \code{submitJob}). Note that there is a helper function \code{\link{cfKillJob}} to repeatedly try to kill jobs. Set \code{killJob} to \code{NULL} if killing jobs cannot be supported.} \item{listJobsQueued}{[\code{function(reg)}]\cr List all queued jobs on the batch system for the current user. Must return an character vector of batch ids, same format as they are returned by \code{submitJob}. Set \code{listJobsQueued} to \code{NULL} if listing of queued jobs is not supported.} \item{listJobsRunning}{[\code{function(reg)}]\cr List all running jobs on the batch system for the current user. Must return an character vector of batch ids, same format as they are returned by \code{submitJob}. It does not matter if you return a few job ids too many (e.g. all for the current user instead of all for the current registry), but you have to include all relevant ones. Must have the argument are \code{reg} (\code{\link{Registry}}). Set \code{listJobsRunning} to \code{NULL} if listing of running jobs is not supported.} \item{array.var}{[\code{character(1)}]\cr Name of the environment variable set by the scheduler to identify IDs of job arrays. Default is \code{NA} for no array support.} \item{store.job.collection}{[\code{logical(1)}]\cr Flag to indicate that the cluster function implementation of \code{submitJob} can not directly handle \code{\link{JobCollection}} objects. If set to \code{FALSE}, the \code{\link{JobCollection}} is serialized to the file system before submitting the job.} \item{store.job.files}{[\code{logical(1)}]\cr Flag to indicate that job files need to be stored in the file directory. If set to \code{FALSE} (default), the job file is created in a temporary directory, otherwise (or if the debug mode is enabled) in the subdirectory \code{jobs} of the \code{file.dir}.} \item{scheduler.latency}{[\code{numeric(1)}]\cr Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling \code{\link{submitJobs}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} \item{hooks}{[\code{list}]\cr Named list of functions which will we called on certain events like \dQuote{pre.submit} or \dQuote{post.sync}. See \link{Hooks}.} } \description{ This is the constructor used to create \emph{custom} cluster functions. Note that some standard implementations for TORQUE, Slurm, LSF, SGE, etc. ship with the package. } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsSocket}()}, \code{\link{makeClusterFunctionsTORQUE}()} Other ClusterFunctionsHelper: \code{\link{cfBrewTemplate}()}, \code{\link{cfHandleUnknownSubmitError}()}, \code{\link{cfKillJob}()}, \code{\link{cfReadBrewTemplate}()}, \code{\link{makeSubmitJobResult}()}, \code{\link{runOSCommand}()} } \concept{ClusterFunctions} \concept{ClusterFunctionsHelper} ================================================ FILE: man/makeClusterFunctionsDocker.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsDocker.R \name{makeClusterFunctionsDocker} \alias{makeClusterFunctionsDocker} \title{ClusterFunctions for Docker} \usage{ makeClusterFunctionsDocker( image, docker.args = character(0L), image.args = character(0L), scheduler.latency = 1, fs.latency = 65 ) } \arguments{ \item{image}{[\code{character(1)}]\cr Name of the docker image to run.} \item{docker.args}{[\code{character}]\cr Additional arguments passed to \dQuote{docker} *before* the command (\dQuote{run}, \dQuote{ps} or \dQuote{kill}) to execute (e.g., the docker host).} \item{image.args}{[\code{character}]\cr Additional arguments passed to \dQuote{docker run} (e.g., to define mounts or environment variables).} \item{scheduler.latency}{[\code{numeric(1)}]\cr Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling \code{\link{submitJobs}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [\code{\link{ClusterFunctions}}]. } \description{ Cluster functions for Docker/Docker Swarm (\url{https://docs.docker.com/engine/swarm/}). The \code{submitJob} function executes \code{docker [docker.args] run --detach=true [image.args] [resources] [image] [cmd]}. Arguments \code{docker.args}, \code{image.args} and \code{image} can be set on construction. The \code{resources} part takes the named resources \code{ncpus} and \code{memory} from \code{\link{submitJobs}} and maps them to the arguments \code{--cpu-shares} and \code{--memory} (in Megabytes). The resource \code{threads} is mapped to the environment variables \dQuote{OMP_NUM_THREADS} and \dQuote{OPENBLAS_NUM_THREADS}. To reliably identify jobs in the swarm, jobs are labeled with \dQuote{batchtools=[job.hash]} and named using the current login name (label \dQuote{user}) and the job hash (label \dQuote{batchtools}). \code{listJobsRunning} uses \code{docker [docker.args] ps --format=\{\{.ID\}\}} to filter for running jobs. \code{killJobs} uses \code{docker [docker.args] kill [batch.id]} to filter for running jobs. These cluster functions use a \link{Hook} to remove finished jobs before a new submit and every time the \link{Registry} is synchronized (using \code{\link{syncRegistry}}). This is currently required because docker does not remove terminated containers automatically. Use \code{docker ps -a --filter 'label=batchtools' --filter 'status=exited'} to identify and remove terminated containers manually (or usa a cron job). } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsSocket}()}, \code{\link{makeClusterFunctionsTORQUE}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeClusterFunctionsHyperQueue.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsHyperQueue.R \name{makeClusterFunctionsHyperQueue} \alias{makeClusterFunctionsHyperQueue} \title{ClusterFunctions for HyperQueue} \usage{ makeClusterFunctionsHyperQueue(scheduler.latency = 1, fs.latency = 65) } \arguments{ \item{scheduler.latency}{[\code{numeric(1)}]\cr Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling \code{\link{submitJobs}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [ClusterFunctions]. } \description{ Cluster functions for HyperQueue (\url{https://it4innovations.github.io/hyperqueue/stable/}). Jobs are submitted via the HyperQueue CLI using \code{hq submit} and executed by calling \code{Rscript -e "batchtools::doJobCollection(...)"}. The job name is set to the job hash and logs are handled internally by batchtools. Listing jobs uses \code{hq job list} and cancelling jobs uses \code{hq job cancel}. A running HyperQueue server and workers are required. } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsSocket}()}, \code{\link{makeClusterFunctionsTORQUE}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeClusterFunctionsInteractive.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsInteractive.R \name{makeClusterFunctionsInteractive} \alias{makeClusterFunctionsInteractive} \title{ClusterFunctions for Sequential Execution in the Running R Session} \usage{ makeClusterFunctionsInteractive( external = FALSE, write.logs = TRUE, fs.latency = 0 ) } \arguments{ \item{external}{[\code{logical(1)}]\cr If set to \code{TRUE}, jobs are started in a fresh R session instead of currently active but still waits for its termination. Default is \code{FALSE}.} \item{write.logs}{[\code{logical(1)}]\cr Sink the output to log files. Turning logging off can increase the speed of calculations but makes it very difficult to debug. Default is \code{TRUE}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [\code{\link{ClusterFunctions}}]. } \description{ All jobs are executed sequentially using the current R process in which \code{\link{submitJobs}} is called. Thus, \code{submitJob} blocks the session until the job has finished. The main use of this \code{ClusterFunctions} implementation is to test and debug programs on a local computer. Listing jobs returns an empty vector (as no jobs can be running when you call this) and \code{killJob} is not implemented for the same reasons. } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsSocket}()}, \code{\link{makeClusterFunctionsTORQUE}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeClusterFunctionsLSF.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsLSF.R \name{makeClusterFunctionsLSF} \alias{makeClusterFunctionsLSF} \title{ClusterFunctions for LSF Systems} \usage{ makeClusterFunctionsLSF( template = "lsf", scheduler.latency = 1, fs.latency = 65 ) } \arguments{ \item{template}{[\code{character(1)}]\cr Either a path to a \pkg{brew} template file (with extension \dQuote{tmpl}), or a short descriptive name enabling the following heuristic for the file lookup: \enumerate{ \item \dQuote{batchtools.[template].tmpl} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}. \item \dQuote{batchtools.[template].tmpl} in the current working directory. \item \dQuote{[template].tmpl} in the user config directory (see \code{\link[rappdirs]{user_config_dir}}); on linux this is usually \dQuote{~/.config/batchtools/[template].tmpl}. \item \dQuote{.batchtools.[template].tmpl} in the home directory. \item \dQuote{[template].tmpl} in the package installation directory in the subfolder \dQuote{templates}. }} \item{scheduler.latency}{[\code{numeric(1)}]\cr Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling \code{\link{submitJobs}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [\code{\link{ClusterFunctions}}]. } \description{ Cluster functions for LSF (\url{https://www.ibm.com/products/hpc-workload-management}). Job files are created based on the brew template \code{template.file}. This file is processed with brew and then submitted to the queue using the \code{bsub} command. Jobs are killed using the \code{bkill} command and the list of running jobs is retrieved using \code{bjobs -u $USER -w}. The user must have the appropriate privileges to submit, delete and list jobs on the cluster (this is usually the case). The template file can access all resources passed to \code{\link{submitJobs}} as well as all variables stored in the \code{\link{JobCollection}}. It is the template file's job to choose a queue for the job and handle the desired resource allocations. } \note{ Array jobs are currently not supported. } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsSocket}()}, \code{\link{makeClusterFunctionsTORQUE}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeClusterFunctionsMulticore.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsMulticore.R \name{makeClusterFunctionsMulticore} \alias{makeClusterFunctionsMulticore} \title{ClusterFunctions for Parallel Multicore Execution} \usage{ makeClusterFunctionsMulticore(ncpus = NA_integer_, fs.latency = 0) } \arguments{ \item{ncpus}{[\code{integer(1)}]\cr Number of CPUs. Default is to use all logical cores. The total number of cores "available" can be set via the option \code{mc.cores} and defaults to the heuristic implemented in \code{\link[parallel]{detectCores}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [\code{\link{ClusterFunctions}}]. } \description{ Jobs are spawned asynchronously using the functions \code{mcparallel} and \code{mccollect} (both in \pkg{parallel}). Does not work on Windows, use \code{\link{makeClusterFunctionsSocket}} instead. } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsSocket}()}, \code{\link{makeClusterFunctionsTORQUE}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeClusterFunctionsOpenLava.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsOpenLava.R \name{makeClusterFunctionsOpenLava} \alias{makeClusterFunctionsOpenLava} \title{ClusterFunctions for OpenLava} \usage{ makeClusterFunctionsOpenLava( template = "openlava", scheduler.latency = 1, fs.latency = 65 ) } \arguments{ \item{template}{[\code{character(1)}]\cr Either a path to a \pkg{brew} template file (with extension \dQuote{tmpl}), or a short descriptive name enabling the following heuristic for the file lookup: \enumerate{ \item \dQuote{batchtools.[template].tmpl} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}. \item \dQuote{batchtools.[template].tmpl} in the current working directory. \item \dQuote{[template].tmpl} in the user config directory (see \code{\link[rappdirs]{user_config_dir}}); on linux this is usually \dQuote{~/.config/batchtools/[template].tmpl}. \item \dQuote{.batchtools.[template].tmpl} in the home directory. \item \dQuote{[template].tmpl} in the package installation directory in the subfolder \dQuote{templates}. }} \item{scheduler.latency}{[\code{numeric(1)}]\cr Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling \code{\link{submitJobs}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [\code{\link{ClusterFunctions}}]. } \description{ Cluster functions for OpenLava. Job files are created based on the brew template \code{template}. This file is processed with brew and then submitted to the queue using the \code{bsub} command. Jobs are killed using the \code{bkill} command and the list of running jobs is retrieved using \code{bjobs -u $USER -w}. The user must have the appropriate privileges to submit, delete and list jobs on the cluster (this is usually the case). The template file can access all resources passed to \code{\link{submitJobs}} as well as all variables stored in the \code{\link{JobCollection}}. It is the template file's job to choose a queue for the job and handle the desired resource allocations. } \note{ Array jobs are currently not supported. } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsSocket}()}, \code{\link{makeClusterFunctionsTORQUE}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeClusterFunctionsSGE.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsSGE.R \name{makeClusterFunctionsSGE} \alias{makeClusterFunctionsSGE} \title{ClusterFunctions for SGE Systems} \usage{ makeClusterFunctionsSGE( template = "sge", nodename = "localhost", scheduler.latency = 1, fs.latency = 65 ) } \arguments{ \item{template}{[\code{character(1)}]\cr Either a path to a \pkg{brew} template file (with extension \dQuote{tmpl}), or a short descriptive name enabling the following heuristic for the file lookup: \enumerate{ \item \dQuote{batchtools.[template].tmpl} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}. \item \dQuote{batchtools.[template].tmpl} in the current working directory. \item \dQuote{[template].tmpl} in the user config directory (see \code{\link[rappdirs]{user_config_dir}}); on linux this is usually \dQuote{~/.config/batchtools/[template].tmpl}. \item \dQuote{.batchtools.[template].tmpl} in the home directory. \item \dQuote{[template].tmpl} in the package installation directory in the subfolder \dQuote{templates}. }} \item{nodename}{[\code{character(1)}]\cr Nodename of the master host. All commands are send via SSH to this host. Only works iff \enumerate{ \item{Passwordless authentication (e.g., via SSH public key authentication) is set up.} \item{The file directory is shared across machines, e.g. mounted via SSHFS.} \item{Either the absolute path to the \code{file.dir} is identical on the machines, or paths are provided relative to the home directory. Symbolic links should work.} }} \item{scheduler.latency}{[\code{numeric(1)}]\cr Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling \code{\link{submitJobs}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [\code{\link{ClusterFunctions}}]. } \description{ Cluster functions for Univa Grid Engine / Oracle Grid Engine / Sun Grid Engine (\url{https://altair.com/hpc-cloud-applications/}). Job files are created based on the brew template \code{template}. This file is processed with brew and then submitted to the queue using the \code{qsub} command. Jobs are killed using the \code{qdel} command and the list of running jobs is retrieved using \code{qselect}. The user must have the appropriate privileges to submit, delete and list jobs on the cluster (this is usually the case). The template file can access all resources passed to \code{\link{submitJobs}} as well as all variables stored in the \code{\link{JobCollection}}. It is the template file's job to choose a queue for the job and handle the desired resource allocations. } \note{ Array jobs are currently not supported. } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsSocket}()}, \code{\link{makeClusterFunctionsTORQUE}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeClusterFunctionsSSH.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsSSH.R \name{makeClusterFunctionsSSH} \alias{makeClusterFunctionsSSH} \title{ClusterFunctions for Remote SSH Execution} \usage{ makeClusterFunctionsSSH(workers, fs.latency = 65) } \arguments{ \item{workers}{[\code{list} of \code{\link{Worker}}]\cr List of Workers as constructed with \code{\link{Worker}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [\code{\link{ClusterFunctions}}]. } \description{ Jobs are spawned by starting multiple R sessions via \code{Rscript} over SSH. If the hostname of the \code{\link{Worker}} equals \dQuote{localhost}, \code{Rscript} is called directly so that you do not need to have an SSH client installed. } \note{ If you use a custom \dQuote{.ssh/config} file, make sure your ProxyCommand passes \sQuote{-q} to ssh, otherwise each output will end with the message \dQuote{Killed by signal 1} and this will break the communication with the nodes. } \examples{ \dontrun{ # cluster functions for multicore execution on the local machine makeClusterFunctionsSSH(list(Worker$new("localhost", ncpus = 2))) } } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsSocket}()}, \code{\link{makeClusterFunctionsTORQUE}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeClusterFunctionsSlurm.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsSlurm.R \name{makeClusterFunctionsSlurm} \alias{makeClusterFunctionsSlurm} \title{ClusterFunctions for Slurm Systems} \usage{ makeClusterFunctionsSlurm( template = "slurm", array.jobs = TRUE, nodename = "localhost", scheduler.latency = 1, fs.latency = 65 ) } \arguments{ \item{template}{[\code{character(1)}]\cr Either a path to a \pkg{brew} template file (with extension \dQuote{tmpl}), or a short descriptive name enabling the following heuristic for the file lookup: \enumerate{ \item \dQuote{batchtools.[template].tmpl} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}. \item \dQuote{batchtools.[template].tmpl} in the current working directory. \item \dQuote{[template].tmpl} in the user config directory (see \code{\link[rappdirs]{user_config_dir}}); on linux this is usually \dQuote{~/.config/batchtools/[template].tmpl}. \item \dQuote{.batchtools.[template].tmpl} in the home directory. \item \dQuote{[template].tmpl} in the package installation directory in the subfolder \dQuote{templates}. }} \item{array.jobs}{[\code{logical(1)}]\cr If array jobs are disabled on the computing site, set to \code{FALSE}.} \item{nodename}{[\code{character(1)}]\cr Nodename of the master host. All commands are send via SSH to this host. Only works iff \enumerate{ \item{Passwordless authentication (e.g., via SSH public key authentication) is set up.} \item{The file directory is shared across machines, e.g. mounted via SSHFS.} \item{Either the absolute path to the \code{file.dir} is identical on the machines, or paths are provided relative to the home directory. Symbolic links should work.} }} \item{scheduler.latency}{[\code{numeric(1)}]\cr Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling \code{\link{submitJobs}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [\code{\link{ClusterFunctions}}]. } \description{ Cluster functions for Slurm (\url{https://slurm.schedmd.com/}). Job files are created based on the brew template \code{template.file}. This file is processed with brew and then submitted to the queue using the \code{sbatch} command. Jobs are killed using the \code{scancel} command and the list of running jobs is retrieved using \code{squeue}. The user must have the appropriate privileges to submit, delete and list jobs on the cluster (this is usually the case). The template file can access all resources passed to \code{\link{submitJobs}} as well as all variables stored in the \code{\link{JobCollection}}. It is the template file's job to choose a queue for the job and handle the desired resource allocations. Note that you might have to specify the cluster name here if you do not want to use the default, otherwise the commands for listing and killing jobs will not work. } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSocket}()}, \code{\link{makeClusterFunctionsTORQUE}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeClusterFunctionsSocket.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsSocket.R \name{makeClusterFunctionsSocket} \alias{makeClusterFunctionsSocket} \title{ClusterFunctions for Parallel Socket Execution} \usage{ makeClusterFunctionsSocket(ncpus = NA_integer_, fs.latency = 65) } \arguments{ \item{ncpus}{[\code{integer(1)}]\cr Number of CPUs. Default is to use all logical cores. The total number of cores "available" can be set via the option \code{mc.cores} and defaults to the heuristic implemented in \code{\link[parallel]{detectCores}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [\code{\link{ClusterFunctions}}]. } \description{ Jobs are spawned asynchronously using the package \pkg{snow}. } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsTORQUE}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeClusterFunctionsTORQUE.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctionsTORQUE.R \name{makeClusterFunctionsTORQUE} \alias{makeClusterFunctionsTORQUE} \title{ClusterFunctions for OpenPBS/TORQUE Systems} \usage{ makeClusterFunctionsTORQUE( template = "torque", scheduler.latency = 1, fs.latency = 65 ) } \arguments{ \item{template}{[\code{character(1)}]\cr Either a path to a \pkg{brew} template file (with extension \dQuote{tmpl}), or a short descriptive name enabling the following heuristic for the file lookup: \enumerate{ \item \dQuote{batchtools.[template].tmpl} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}. \item \dQuote{batchtools.[template].tmpl} in the current working directory. \item \dQuote{[template].tmpl} in the user config directory (see \code{\link[rappdirs]{user_config_dir}}); on linux this is usually \dQuote{~/.config/batchtools/[template].tmpl}. \item \dQuote{.batchtools.[template].tmpl} in the home directory. \item \dQuote{[template].tmpl} in the package installation directory in the subfolder \dQuote{templates}. }} \item{scheduler.latency}{[\code{numeric(1)}]\cr Time to sleep after important interactions with the scheduler to ensure a sane state. Currently only triggered after calling \code{\link{submitJobs}}.} \item{fs.latency}{[\code{numeric(1)}]\cr Expected maximum latency of the file system, in seconds. Set to a positive number for network file systems like NFS which enables more robust (but also more expensive) mechanisms to access files and directories. Usually safe to set to \code{0} to disable the heuristic, e.g. if you are working on a local file system.} } \value{ [\code{\link{ClusterFunctions}}]. } \description{ Cluster functions for TORQUE/PBS (\url{https://adaptivecomputing.com/cherry-services/torque-resource-manager/}). Job files are created based on the brew template \code{template.file}. This file is processed with brew and then submitted to the queue using the \code{qsub} command. Jobs are killed using the \code{qdel} command and the list of running jobs is retrieved using \code{qselect}. The user must have the appropriate privileges to submit, delete and list jobs on the cluster (this is usually the case). The template file can access all resources passed to \code{\link{submitJobs}} as well as all variables stored in the \code{\link{JobCollection}}. It is the template file's job to choose a queue for the job and handle the desired resource allocations. } \seealso{ Other ClusterFunctions: \code{\link{makeClusterFunctions}()}, \code{\link{makeClusterFunctionsDocker}()}, \code{\link{makeClusterFunctionsHyperQueue}()}, \code{\link{makeClusterFunctionsInteractive}()}, \code{\link{makeClusterFunctionsLSF}()}, \code{\link{makeClusterFunctionsMulticore}()}, \code{\link{makeClusterFunctionsOpenLava}()}, \code{\link{makeClusterFunctionsSGE}()}, \code{\link{makeClusterFunctionsSSH}()}, \code{\link{makeClusterFunctionsSlurm}()}, \code{\link{makeClusterFunctionsSocket}()} } \concept{ClusterFunctions} ================================================ FILE: man/makeExperimentRegistry.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ExperimentRegistry.R \name{makeExperimentRegistry} \alias{makeExperimentRegistry} \alias{ExperimentRegistry} \title{ExperimentRegistry Constructor} \usage{ makeExperimentRegistry( file.dir = "registry", work.dir = getwd(), conf.file = findConfFile(), packages = character(0L), namespaces = character(0L), source = character(0L), load = character(0L), seed = NULL, make.default = TRUE ) } \arguments{ \item{file.dir}{[\code{character(1)}]\cr Path where all files of the registry are saved. Default is directory \dQuote{registry} in the current working directory. The provided path will get normalized unless it is given relative to the home directory (i.e., starting with \dQuote{~}). Note that some templates do not handle relative paths well. If you pass \code{NA}, a temporary directory will be used. This way, you can create disposable registries for \code{\link{btlapply}} or examples. By default, the temporary directory \code{\link[base]{tempdir}()} will be used. If you want to use another directory, e.g. a directory which is shared between nodes, you can set it in your configuration file by setting the variable \code{temp.dir}.} \item{work.dir}{[\code{character(1)}]\cr Working directory for R process for running jobs. Defaults to the working directory currently set during Registry construction (see \code{\link[base]{getwd}}). \code{loadRegistry} uses the stored \code{work.dir}, but you may also explicitly overwrite it, e.g., after switching to another system. The provided path will get normalized unless it is given relative to the home directory (i.e., starting with \dQuote{~}). Note that some templates do not handle relative paths well.} \item{conf.file}{[\code{character(1)}]\cr Path to a configuration file which is sourced while the registry is created. In the configuration file you can define how \pkg{batchtools} interacts with the system via \code{\link{ClusterFunctions}}. Separating the configuration of the underlying host system from the R code allows to easily move computation to another site. The file lookup is implemented in the internal (but exported) function \code{findConfFile} which returns the first file found of the following candidates: \enumerate{ \item{File \dQuote{batchtools.conf.R} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}.} \item{File \dQuote{batchtools.conf.R} in the current working directory.} \item{File \dQuote{config.R} in the user configuration directory as reported by \code{rappdirs::user_config_dir("batchtools", expand = FALSE)} (depending on OS, e.g., on linux this usually resolves to \dQuote{~/.config/batchtools/config.R}).} \item{\dQuote{.batchtools.conf.R} in the home directory (\dQuote{~}).} \item{\dQuote{config.R} in the site config directory as reported by \code{rappdirs::site_config_dir("batchtools")} (depending on OS). This file can be used for admins to set sane defaults for a computation site.} } Set to \code{NA} if you want to suppress reading any configuration file. If a configuration file is found, it gets sourced inside the environment of the registry after the defaults for all variables are set. Therefore you can set and overwrite slots, e.g. \code{default.resources = list(walltime = 3600)} to set default resources or \dQuote{max.concurrent.jobs} to limit the number of jobs allowed to run simultaneously on the system.} \item{packages}{[\code{character}]\cr Packages that will always be loaded on each node. Uses \code{\link[base]{require}} internally. Default is \code{character(0)}.} \item{namespaces}{[\code{character}]\cr Same as \code{packages}, but the packages will not be attached. Uses \code{\link[base]{requireNamespace}} internally. Default is \code{character(0)}.} \item{source}{[\code{character}]\cr Files which should be sourced on the slaves prior to executing a job. Calls \code{\link[base]{sys.source}} using the \code{\link[base]{.GlobalEnv}}.} \item{load}{[\code{character}]\cr Files which should be loaded on the slaves prior to executing a job. Calls \code{\link[base]{load}} using the \code{\link[base]{.GlobalEnv}}.} \item{seed}{[\code{integer(1)}]\cr Start seed for jobs. Each job uses the (\code{seed} + \code{job.id}) as seed. Default is a random integer between 1 and 32768. Note that there is an additional seeding mechanism to synchronize instantiation of \code{\link{Problem}}s in a \code{\link{ExperimentRegistry}}.} \item{make.default}{[\code{logical(1)}]\cr If set to \code{TRUE}, the created registry is saved inside the package namespace and acts as default registry. You might want to switch this off if you work with multiple registries simultaneously. Default is \code{TRUE}.} } \value{ [\code{ExperimentRegistry}]. } \description{ \code{makeExperimentRegistry} constructs a special \code{\link{Registry}} which is suitable for the definition of large scale computer experiments. Each experiments consists of a \code{\link{Problem}} and an \code{\link{Algorithm}}. These can be parametrized with \code{\link{addExperiments}} to actually define computational jobs. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE) # Definde one problem, two algorithms and add them with some parameters: addProblem(reg = tmp, "p1", fun = function(job, data, n, mean, sd, ...) rnorm(n, mean = mean, sd = sd)) addAlgorithm(reg = tmp, "a1", fun = function(job, data, instance, ...) mean(instance)) addAlgorithm(reg = tmp, "a2", fun = function(job, data, instance, ...) median(instance)) ids = addExperiments(reg = tmp, list(p1 = data.table::CJ(n = c(50, 100), mean = -2:2, sd = 1:4))) # Overview over defined experiments: tmp$problems tmp$algorithms summarizeExperiments(reg = tmp) summarizeExperiments(reg = tmp, by = c("problem", "algorithm", "n")) ids = findExperiments(prob.pars = (n == 50), reg = tmp) print(unwrap(getJobPars(ids, reg = tmp))) # Submit jobs submitJobs(reg = tmp) waitForJobs(reg = tmp) # Reduce the results of algorithm a1 ids.mean = findExperiments(algo.name = "a1", reg = tmp) reduceResults(ids.mean, fun = function(aggr, res, ...) c(aggr, res), reg = tmp) # Join info table with all results and calculate mean of results # grouped by n and algorithm ids = findDone(reg = tmp) pars = unwrap(getJobPars(ids, reg = tmp)) results = unwrap(reduceResultsDataTable(ids, fun = function(res) list(res = res), reg = tmp)) tab = ljoin(pars, results) tab[, list(mres = mean(res)), by = c("n", "algorithm")] } \concept{Registry Experiment} ================================================ FILE: man/makeRegistry.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Registry.R \name{makeRegistry} \alias{makeRegistry} \alias{Registry} \title{Registry Constructor} \usage{ makeRegistry( file.dir = "registry", work.dir = getwd(), conf.file = findConfFile(), packages = character(0L), namespaces = character(0L), source = character(0L), load = character(0L), seed = NULL, make.default = TRUE ) } \arguments{ \item{file.dir}{[\code{character(1)}]\cr Path where all files of the registry are saved. Default is directory \dQuote{registry} in the current working directory. The provided path will get normalized unless it is given relative to the home directory (i.e., starting with \dQuote{~}). Note that some templates do not handle relative paths well. If you pass \code{NA}, a temporary directory will be used. This way, you can create disposable registries for \code{\link{btlapply}} or examples. By default, the temporary directory \code{\link[base]{tempdir}()} will be used. If you want to use another directory, e.g. a directory which is shared between nodes, you can set it in your configuration file by setting the variable \code{temp.dir}.} \item{work.dir}{[\code{character(1)}]\cr Working directory for R process for running jobs. Defaults to the working directory currently set during Registry construction (see \code{\link[base]{getwd}}). \code{loadRegistry} uses the stored \code{work.dir}, but you may also explicitly overwrite it, e.g., after switching to another system. The provided path will get normalized unless it is given relative to the home directory (i.e., starting with \dQuote{~}). Note that some templates do not handle relative paths well.} \item{conf.file}{[\code{character(1)}]\cr Path to a configuration file which is sourced while the registry is created. In the configuration file you can define how \pkg{batchtools} interacts with the system via \code{\link{ClusterFunctions}}. Separating the configuration of the underlying host system from the R code allows to easily move computation to another site. The file lookup is implemented in the internal (but exported) function \code{findConfFile} which returns the first file found of the following candidates: \enumerate{ \item{File \dQuote{batchtools.conf.R} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}.} \item{File \dQuote{batchtools.conf.R} in the current working directory.} \item{File \dQuote{config.R} in the user configuration directory as reported by \code{rappdirs::user_config_dir("batchtools", expand = FALSE)} (depending on OS, e.g., on linux this usually resolves to \dQuote{~/.config/batchtools/config.R}).} \item{\dQuote{.batchtools.conf.R} in the home directory (\dQuote{~}).} \item{\dQuote{config.R} in the site config directory as reported by \code{rappdirs::site_config_dir("batchtools")} (depending on OS). This file can be used for admins to set sane defaults for a computation site.} } Set to \code{NA} if you want to suppress reading any configuration file. If a configuration file is found, it gets sourced inside the environment of the registry after the defaults for all variables are set. Therefore you can set and overwrite slots, e.g. \code{default.resources = list(walltime = 3600)} to set default resources or \dQuote{max.concurrent.jobs} to limit the number of jobs allowed to run simultaneously on the system.} \item{packages}{[\code{character}]\cr Packages that will always be loaded on each node. Uses \code{\link[base]{require}} internally. Default is \code{character(0)}.} \item{namespaces}{[\code{character}]\cr Same as \code{packages}, but the packages will not be attached. Uses \code{\link[base]{requireNamespace}} internally. Default is \code{character(0)}.} \item{source}{[\code{character}]\cr Files which should be sourced on the slaves prior to executing a job. Calls \code{\link[base]{sys.source}} using the \code{\link[base]{.GlobalEnv}}.} \item{load}{[\code{character}]\cr Files which should be loaded on the slaves prior to executing a job. Calls \code{\link[base]{load}} using the \code{\link[base]{.GlobalEnv}}.} \item{seed}{[\code{integer(1)}]\cr Start seed for jobs. Each job uses the (\code{seed} + \code{job.id}) as seed. Default is a random integer between 1 and 32768. Note that there is an additional seeding mechanism to synchronize instantiation of \code{\link{Problem}}s in a \code{\link{ExperimentRegistry}}.} \item{make.default}{[\code{logical(1)}]\cr If set to \code{TRUE}, the created registry is saved inside the package namespace and acts as default registry. You might want to switch this off if you work with multiple registries simultaneously. Default is \code{TRUE}.} } \value{ [\code{environment}] of class \dQuote{Registry} with the following slots: \describe{ \item{\code{file.dir} [path]:}{File directory.} \item{\code{work.dir} [path]:}{Working directory.} \item{\code{temp.dir} [path]:}{Temporary directory. Used if \code{file.dir} is \code{NA} to create temporary registries.} \item{\code{packages} [character()]:}{Packages to load on the slaves.} \item{\code{namespaces} [character()]:}{Namespaces to load on the slaves.} \item{\code{seed} [integer(1)]:}{Registry seed. Before each job is executed, the seed \code{seed + job.id} is set.} \item{\code{cluster.functions} [cluster.functions]:}{Usually set in your \code{conf.file}. Set via a call to \code{\link{makeClusterFunctions}}. See example.} \item{\code{default.resources} [named list()]:}{Usually set in your \code{conf.file}. Named list of default resources.} \item{\code{max.concurrent.jobs} [integer(1)]:}{Usually set in your \code{conf.file}. Maximum number of concurrent jobs for a single user and current registry on the system. \code{\link{submitJobs}} will try to respect this setting. The resource \dQuote{max.concurrent.jobs} has higher precedence.} \item{\code{defs} [data.table]:}{Table with job definitions (i.e. parameters).} \item{\code{status} [data.table]:}{Table holding information about the computational status. Also see \code{\link{getJobStatus}}.} \item{\code{resources} [data.table]:}{Table holding information about the computational resources used for the job. Also see \code{\link{getJobResources}}.} \item{\code{tags} [data.table]:}{Table holding information about tags. See \link{Tags}.} \item{\code{hash} [character(1)]:}{Unique hash which changes each time the registry gets saved to the file system. Can be utilized to invalidate the cache of \pkg{knitr}.} } } \description{ \code{makeRegistry} constructs the inter-communication object for all functions in \code{batchtools}. All communication transactions are processed via the file system: All information required to run a job is stored as \code{\link{JobCollection}} in a file in the a subdirectory of the \code{file.dir} directory. Each jobs stores its results as well as computational status information (start time, end time, error message, ...) also on the file system which is regular merged parsed by the master using \code{\link{syncRegistry}}. After integrating the new information into the Registry, the Registry is serialized to the file system via \code{\link{saveRegistry}}. Both \code{\link{syncRegistry}} and \code{\link{saveRegistry}} are called whenever required internally. Therefore it should be safe to quit the R session at any time. Work can later be resumed by calling \code{\link{loadRegistry}} which de-serializes the registry from the file system. The registry created last is saved in the package namespace (unless \code{make.default} is set to \code{FALSE}) and can be retrieved via \code{\link{getDefaultRegistry}}. Canceled jobs and jobs submitted multiple times may leave stray files behind. These can be swept using \code{\link{sweepRegistry}}. \code{\link{clearRegistry}} completely erases all jobs from a registry, including log files and results, and thus allows you to start over. } \details{ Currently \pkg{batchtools} understands the following options set via the configuration file: \describe{ \item{\code{cluster.functions}:}{As returned by a constructor, e.g. \code{\link{makeClusterFunctionsSlurm}}.} \item{\code{default.resources}:}{List of resources to use. Will be overruled by resources specified via \code{\link{submitJobs}}.} \item{\code{temp.dir}:}{Path to directory to use for temporary registries.} \item{\code{sleep}:}{Custom sleep function. See \code{\link{waitForJobs}}.} \item{\code{expire.after}:}{Number of iterations before treating jobs as expired in \code{\link{waitForJobs}}.} \item{\code{compress}:}{Compression algorithm to use via \code{\link{saveRDS}}.} } } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) print(tmp) # Set cluster functions to interactive mode and start jobs in external R sessions tmp$cluster.functions = makeClusterFunctionsInteractive(external = TRUE) # Change packages to load tmp$packages = c("MASS") saveRegistry(reg = tmp) } \seealso{ Other Registry: \code{\link{clearRegistry}()}, \code{\link{getDefaultRegistry}()}, \code{\link{loadRegistry}()}, \code{\link{removeRegistry}()}, \code{\link{saveRegistry}()}, \code{\link{sweepRegistry}()}, \code{\link{syncRegistry}()} } \concept{Registry} ================================================ FILE: man/makeSubmitJobResult.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clusterFunctions.R \name{makeSubmitJobResult} \alias{makeSubmitJobResult} \alias{SubmitJobResult} \title{Create a SubmitJobResult} \usage{ makeSubmitJobResult( status, batch.id, log.file = NA_character_, msg = NA_character_ ) } \arguments{ \item{status}{[\code{integer(1)}]\cr Launch status of job. 0 means success, codes between 1 and 100 are temporary errors and any error greater than 100 is a permanent failure.} \item{batch.id}{[\code{character()}]\cr Unique id of this job on batch system, as given by the batch system. Must be globally unique so that the job can be terminated using just this information. For array jobs, this may be a vector of length equal to the number of jobs in the array.} \item{log.file}{[\code{character()}]\cr Log file. If \code{NA}, defaults to \code{[job.hash].log}. Some cluster functions set this for array jobs.} \item{msg}{[\code{character(1)}]\cr Optional error message in case \code{status} is not equal to 0. Default is \dQuote{OK}, \dQuote{TEMPERROR}, \dQuote{ERROR}, depending on \code{status}.} } \value{ [\code{\link{SubmitJobResult}}]. A list, containing \code{status}, \code{batch.id} and \code{msg}. } \description{ This function is only intended for use in your own cluster functions implementation. Use this function in your implementation of \code{\link{makeClusterFunctions}} to create a return value for the \code{submitJob} function. } \seealso{ Other ClusterFunctionsHelper: \code{\link{cfBrewTemplate}()}, \code{\link{cfHandleUnknownSubmitError}()}, \code{\link{cfKillJob}()}, \code{\link{cfReadBrewTemplate}()}, \code{\link{makeClusterFunctions}()}, \code{\link{runOSCommand}()} } \concept{ClusterFunctionsHelper} ================================================ FILE: man/reduceResults.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/reduceResults.R \name{reduceResults} \alias{reduceResults} \title{Reduce Results} \usage{ reduceResults(fun, ids = NULL, init, ..., reg = getDefaultRegistry()) } \arguments{ \item{fun}{[\code{function}]\cr A function to reduce the results. The result of previous iterations (or the \code{init}) will be passed as first argument, the result of of the i-th iteration as second. See \code{\link[base]{Reduce}} for some examples. If the function has the formal argument \dQuote{job}, the \code{\link{Job}}/\code{\link{Experiment}} is also passed to the function (named).} \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of \code{\link{findDone}}. Invalid ids are ignored.} \item{init}{[\code{ANY}]\cr Initial element, as used in \code{\link[base]{Reduce}}. If missing, the reduction uses the result of the first job as \code{init} and the reduction starts with the second job.} \item{...}{[\code{ANY}]\cr Additional arguments passed to function \code{fun}.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ Aggregated results in the same order as provided ids. Return type depends on the user function. If \code{ids} is empty, \code{reduceResults} returns \code{init} (if available) or \code{NULL} otherwise. } \description{ A version of \code{\link[base]{Reduce}} for \code{\link{Registry}} objects which iterates over finished jobs and aggregates them. All jobs must have terminated, an error is raised otherwise. } \note{ If you have thousands of jobs, disabling the progress bar (\code{options(batchtools.progress = FALSE)}) can significantly increase the performance. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) batchMap(function(a, b) list(sum = a+b, prod = a*b), a = 1:3, b = 1:3, reg = tmp) submitJobs(reg = tmp) waitForJobs(reg = tmp) # Extract element sum from each result reduceResults(function(aggr, res) c(aggr, res$sum), init = list(), reg = tmp) # Aggregate element sum via '+' reduceResults(function(aggr, res) aggr + res$sum, init = 0, reg = tmp) # Aggregate element prod via '*' where parameter b < 3 reduce = function(aggr, res, job) { if (job$pars$b >= 3) return(aggr) aggr * res$prod } reduceResults(reduce, init = 1, reg = tmp) # Reduce to data.frame() (inefficient, use reduceResultsDataTable() instead) reduceResults(rbind, init = data.frame(), reg = tmp) # Reduce to data.frame by collecting results first, then utilize vectorization of rbind: res = reduceResultsList(fun = as.data.frame, reg = tmp) do.call(rbind, res) # Reduce with custom combine function: comb = function(x, y) list(sum = x$sum + y$sum, prod = x$prod * y$prod) reduceResults(comb, reg = tmp) # The same with neutral element NULL comb = function(x, y) if (is.null(x)) y else list(sum = x$sum + y$sum, prod = x$prod * y$prod) reduceResults(comb, init = NULL, reg = tmp) # Alternative: Reduce in list, reduce manually in a 2nd step res = reduceResultsList(reg = tmp) Reduce(comb, res) } \seealso{ Other Results: \code{\link{batchMapResults}()}, \code{\link{loadResult}()}, \code{\link{reduceResultsList}()} } \concept{Results} ================================================ FILE: man/reduceResultsList.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/reduceResults.R \name{reduceResultsList} \alias{reduceResultsList} \alias{reduceResultsDataTable} \title{Apply Functions on Results} \usage{ reduceResultsList( ids = NULL, fun = NULL, ..., missing.val, reg = getDefaultRegistry() ) reduceResultsDataTable( ids = NULL, fun = NULL, ..., missing.val, reg = getDefaultRegistry() ) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of \code{\link{findDone}}. Invalid ids are ignored.} \item{fun}{[\code{function}]\cr Function to apply to each result. The result is passed unnamed as first argument. If \code{NULL}, the identity is used. If the function has the formal argument \dQuote{job}, the \code{\link{Job}}/\code{\link{Experiment}} is also passed to the function.} \item{...}{[\code{ANY}]\cr Additional arguments passed to to function \code{fun}.} \item{missing.val}{[\code{ANY}]\cr Value to impute as result for a job which is not finished. If not provided and a result is missing, an exception is raised.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ \code{reduceResultsList} returns a list of the results in the same order as the provided ids. \code{reduceResultsDataTable} returns a \code{\link[data.table]{data.table}} with columns \dQuote{job.id} and additional result columns created via \code{\link[data.table]{rbindlist}}, sorted by \dQuote{job.id}. } \description{ Applies a function on the results of your finished jobs and thereby collects them in a \code{\link[base]{list}} or \code{\link[data.table]{data.table}}. The later requires the provided function to return a list (or \code{data.frame}) of scalar values. See \code{\link[data.table]{rbindlist}} for features and limitations of the aggregation. If not all jobs are terminated, the respective result will be \code{NULL}. } \note{ If you have thousands of jobs, disabling the progress bar (\code{options(batchtools.progress = FALSE)}) can significantly increase the performance. } \examples{ \dontshow{ batchtools:::example_push_temp(2) } ### Example 1 - reduceResultsList tmp = makeRegistry(file.dir = NA, make.default = FALSE) batchMap(function(x) x^2, x = 1:10, reg = tmp) submitJobs(reg = tmp) waitForJobs(reg = tmp) reduceResultsList(fun = sqrt, reg = tmp) ### Example 2 - reduceResultsDataTable tmp = makeExperimentRegistry(file.dir = NA, make.default = FALSE) # add first problem fun = function(job, data, n, mean, sd, ...) rnorm(n, mean = mean, sd = sd) addProblem("rnorm", fun = fun, reg = tmp) # add second problem fun = function(job, data, n, lambda, ...) rexp(n, rate = lambda) addProblem("rexp", fun = fun, reg = tmp) # add first algorithm fun = function(instance, method, ...) if (method == "mean") mean(instance) else median(instance) addAlgorithm("average", fun = fun, reg = tmp) # add second algorithm fun = function(instance, ...) sd(instance) addAlgorithm("deviation", fun = fun, reg = tmp) # define problem and algorithm designs library(data.table) prob.designs = algo.designs = list() prob.designs$rnorm = CJ(n = 100, mean = -1:1, sd = 1:5) prob.designs$rexp = data.table(n = 100, lambda = 1:5) algo.designs$average = data.table(method = c("mean", "median")) algo.designs$deviation = data.table() # add experiments and submit addExperiments(prob.designs, algo.designs, reg = tmp) submitJobs(reg = tmp) # collect results and join them with problem and algorithm paramters res = ijoin( getJobPars(reg = tmp), reduceResultsDataTable(reg = tmp, fun = function(x) list(res = x)) ) unwrap(res, sep = ".") } \seealso{ \code{\link{reduceResults}} Other Results: \code{\link{batchMapResults}()}, \code{\link{loadResult}()}, \code{\link{reduceResults}()} } \concept{Results} ================================================ FILE: man/removeExperiments.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/removeExperiments.R \name{removeExperiments} \alias{removeExperiments} \title{Remove Experiments} \usage{ removeExperiments(ids = NULL, reg = getDefaultRegistry()) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to no job. Invalid ids are ignored.} \item{reg}{[\code{\link{ExperimentRegistry}}]\cr Registry. If not explicitly passed, uses the last created registry.} } \value{ [\code{\link[data.table]{data.table}}] of removed job ids, invisibly. } \description{ Remove Experiments from an \code{\link{ExperimentRegistry}}. This function automatically checks if any of the jobs to reset is either pending or running. However, if the implemented heuristic fails, this can lead to inconsistencies in the data base. Use with care while jobs are running. } \seealso{ Other Experiment: \code{\link{addExperiments}()}, \code{\link{summarizeExperiments}()} } \concept{Experiment} ================================================ FILE: man/removeRegistry.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/removeRegistry.R \name{removeRegistry} \alias{removeRegistry} \title{Remove a Registry from the File System} \usage{ removeRegistry(wait = 5, reg = getDefaultRegistry()) } \arguments{ \item{wait}{[\code{numeric(1)}]\cr Seconds to wait before proceeding. This is a safety measure to not accidentally remove your precious files. Set to 0 in non-interactive scripts to disable this precaution.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{character(1)}]: Path of the deleted file directory. } \description{ All files will be erased from the file system, including all results. If you wish to remove only intermediate files, use \code{\link{sweepRegistry}}. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) removeRegistry(0, tmp) } \seealso{ Other Registry: \code{\link{clearRegistry}()}, \code{\link{getDefaultRegistry}()}, \code{\link{loadRegistry}()}, \code{\link{makeRegistry}()}, \code{\link{saveRegistry}()}, \code{\link{sweepRegistry}()}, \code{\link{syncRegistry}()} } \concept{Registry} ================================================ FILE: man/resetJobs.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/resetJobs.R \name{resetJobs} \alias{resetJobs} \title{Reset the Computational State of Jobs} \usage{ resetJobs(ids = NULL, reg = getDefaultRegistry()) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to no job. Invalid ids are ignored.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] of job ids which have been reset. See \code{\link{JoinTables}} for examples on working with job tables. } \description{ Resets the computational state of jobs in the \code{\link{Registry}}. This function automatically checks if any of the jobs to reset is either pending or running. However, if the implemented heuristic fails, this can lead to inconsistencies in the data base. Use with care while jobs are running. } \seealso{ Other debug: \code{\link{getErrorMessages}()}, \code{\link{getStatus}()}, \code{\link{grepLogs}()}, \code{\link{killJobs}()}, \code{\link{showLog}()}, \code{\link{testJob}()} } \concept{debug} ================================================ FILE: man/runHook.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Hooks.R \name{runHook} \alias{runHook} \alias{Hooks} \alias{Hook} \title{Trigger Evaluation of Custom Function} \usage{ runHook(obj, hook, ...) } \arguments{ \item{obj}{[\link{Registry} | \link{JobCollection}]\cr Registry which contains the \link{ClusterFunctions} with element \dQuote{hooks} or a \link{JobCollection} which holds the subset of functions which are executed remotely.} \item{hook}{[\code{character(1)}]\cr ID of the hook as string.} \item{...}{[ANY]\cr Additional arguments passed to the function referenced by \code{hook}. See description.} } \value{ Return value of the called function, or \code{NULL} if there is no hook with the specified ID. } \description{ Hooks allow to trigger functions calls on specific events. They can be specified via the \code{\link{ClusterFunctions}} and are triggered on the following events: \describe{ \item{\code{pre.sync}}{\code{function(reg, fns, ...)}: Run before synchronizing the registry on the master. \code{fn} is the character vector of paths to the update files.} \item{\code{post.sync}}{\code{function(reg, updates, ...)}: Run after synchronizing the registry on the master. \code{updates} is the data.table of processed updates.} \item{\code{pre.submit.job}}{\code{function(reg, ...)}: Run before a job is successfully submitted to the scheduler on the master.} \item{\code{post.submit.job}}{\code{function(reg, ...)}: Run after a job is successfully submitted to the scheduler on the master.} \item{\code{pre.submit}}{\code{function(reg, ...)}: Run before any job is submitted to the scheduler.} \item{\code{post.submit}}{\code{function(reg, ...)}: Run after a jobs are submitted to the schedule.} \item{\code{pre.do.collection}}{\code{function(reg, reader, ...)}: Run before starting the job collection on the slave. \code{reader} is an internal cache object.} \item{\code{post.do.collection}}{\code{function(reg, updates, reader, ...)}: Run after all jobs in the chunk are terminated on the slave. \code{updates} is a \code{\link[data.table]{data.table}} of updates which will be merged with the \code{\link{Registry}} by the master. \code{reader} is an internal cache object.} \item{\code{pre.kill}}{\code{function(reg, ids, ...)}: Run before any job is killed.} \item{\code{post.kill}}{\code{function(reg, ids, ...)}: Run after jobs are killed. \code{ids} is the return value of \code{\link{killJobs}}.} } } ================================================ FILE: man/runOSCommand.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/runOSCommand.R \name{runOSCommand} \alias{runOSCommand} \title{Run OS Commands on Local or Remote Machines} \usage{ runOSCommand( sys.cmd, sys.args = character(0L), stdin = "", nodename = "localhost" ) } \arguments{ \item{sys.cmd}{[\code{character(1)}]\cr Command to run.} \item{sys.args}{[\code{character}]\cr Arguments for \code{sys.cmd}.} \item{stdin}{[\code{character(1)}]\cr Argument passed to \code{\link[base]{system2}}.} \item{nodename}{[\code{character(1)}]\cr Name of the SSH node to run the command on. If set to \dQuote{localhost} (default), the command is not piped through SSH.} } \value{ [\code{named list}] with \dQuote{sys.cmd}, \dQuote{sys.args}, \dQuote{exit.code} (integer), \dQuote{output} (character). } \description{ This is a helper function to run arbitrary OS commands on local or remote machines. The interface is similar to \code{\link[base]{system2}}, but it always returns the exit status \emph{and} the output. } \examples{ \dontrun{ runOSCommand("ls") runOSCommand("ls", "-al") runOSCommand("notfound") } } \seealso{ Other ClusterFunctionsHelper: \code{\link{cfBrewTemplate}()}, \code{\link{cfHandleUnknownSubmitError}()}, \code{\link{cfKillJob}()}, \code{\link{cfReadBrewTemplate}()}, \code{\link{makeClusterFunctions}()}, \code{\link{makeSubmitJobResult}()} } \concept{ClusterFunctionsHelper} ================================================ FILE: man/saveRegistry.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/saveRegistry.R \name{saveRegistry} \alias{saveRegistry} \title{Store the Registy to the File System} \usage{ saveRegistry(reg = getDefaultRegistry()) } \arguments{ \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{logical(1)}]: \code{TRUE} if the registry was saved, \code{FALSE} otherwise (if the registry is read-only). } \description{ Stores the registry on the file system in its \dQuote{file.dir} (specified for construction in \code{\link{makeRegistry}}, can be accessed via \code{reg$file.dir}). This function is usually called internally whenever needed. } \seealso{ Other Registry: \code{\link{clearRegistry}()}, \code{\link{getDefaultRegistry}()}, \code{\link{loadRegistry}()}, \code{\link{makeRegistry}()}, \code{\link{removeRegistry}()}, \code{\link{sweepRegistry}()}, \code{\link{syncRegistry}()} } \concept{Registry} ================================================ FILE: man/showLog.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Logs.R \name{showLog} \alias{showLog} \alias{getLog} \title{Inspect Log Files} \usage{ showLog(id, reg = getDefaultRegistry()) getLog(id, reg = getDefaultRegistry()) } \arguments{ \item{id}{[\code{integer(1)} or \code{data.table}]\cr Single integer to specify the job or a \code{data.table} with column \code{job.id} and exactly one row.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ Nothing. } \description{ \code{showLog} opens the log in the pager. For customization, see \code{\link[base]{file.show}}. \code{getLog} returns the log as character vector. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) # Create some dummy jobs fun = function(i) { if (i == 3) stop(i) if (i \%\% 2 == 1) warning("That's odd.") } ids = batchMap(fun, i = 1:5, reg = tmp) submitJobs(reg = tmp) waitForJobs(reg = tmp) getStatus(reg = tmp) writeLines(getLog(ids[1], reg = tmp)) \dontrun{ showLog(ids[1], reg = tmp) } grepLogs(pattern = "warning", ignore.case = TRUE, reg = tmp) } \seealso{ Other debug: \code{\link{getErrorMessages}()}, \code{\link{getStatus}()}, \code{\link{grepLogs}()}, \code{\link{killJobs}()}, \code{\link{resetJobs}()}, \code{\link{testJob}()} } \concept{debug} ================================================ FILE: man/submitJobs.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/submitJobs.R \name{submitJobs} \alias{submitJobs} \title{Submit Jobs to the Batch Systems} \usage{ submitJobs( ids = NULL, resources = list(), sleep = NULL, reg = getDefaultRegistry() ) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of \code{\link{findNotSubmitted}}. Invalid ids are ignored.} \item{resources}{[\code{named list}]\cr Computational resources for the jobs to submit. The actual elements of this list (e.g. something like \dQuote{walltime} or \dQuote{nodes}) depend on your template file, exceptions are outlined in the section 'Resources'. Default settings for a system can be set in the configuration file by defining the named list \code{default.resources}. Note that these settings are merged by name, e.g. merging \code{list(walltime = 300)} into \code{list(walltime = 400, memory = 512)} will result in \code{list(walltime = 300, memory = 512)}. Same holds for individual job resources passed as additional column of \code{ids} (c.f. section 'Resources').} \item{sleep}{[\code{function(i)} | \code{numeric(1)}]\cr Parameter to control the duration to sleep between temporary errors. You can pass an absolute numeric value in seconds or a \code{function(i)} which returns the number of seconds to sleep in the \code{i}-th iteration between temporary errors. If not provided (\code{NULL}), tries to read the value (number/function) from the configuration file (stored in \code{reg$sleep}) or defaults to a function with exponential backoff between 5 and 120 seconds.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{\link[data.table]{data.table}}] with columns \dQuote{job.id} and \dQuote{chunk}. } \description{ Submits defined jobs to the batch system. After submitting the jobs, you can use \code{\link{waitForJobs}} to wait for the termination of jobs or call \code{\link{reduceResultsList}}/\code{\link{reduceResults}} to collect partial results. The progress can be monitored with \code{\link{getStatus}}. } \note{ If you a large number of jobs, disabling the progress bar (\code{options(batchtools.progress = FALSE)}) can significantly increase the performance of \code{submitJobs}. } \section{Resources}{ You can pass arbitrary resources to \code{submitJobs()} which then are available in the cluster function template. Some resources' names are standardized and it is good practice to stick to the following nomenclature to avoid confusion: \describe{ \item{walltime:}{Upper time limit in seconds for jobs before they get killed by the scheduler. Can be passed as additional column as part of \code{ids} to set per-job resources.} \item{memory:}{Memory limit in Mb. If jobs exceed this limit, they are usually killed by the scheduler. Can be passed as additional column as part of \code{ids} to set per-job resources.} \item{ncpus:}{Number of (physical) CPUs to use on the slave. Can be passed as additional column as part of \code{ids} to set per-job resources.} \item{omp.threads:}{Number of threads to use via OpenMP. Used to set environment variable \dQuote{OMP_NUM_THREADS}. Can be passed as additional column as part of \code{ids} to set per-job resources.} \item{pp.size:}{Maximum size of the pointer protection stack, see \code{\link[base]{Memory}}.} \item{blas.threads:}{Number of threads to use for the BLAS backend. Used to set environment variables \dQuote{MKL_NUM_THREADS} and \dQuote{OPENBLAS_NUM_THREADS}. Can be passed as additional column as part of \code{ids} to set per-job resources.} \item{measure.memory:}{Enable memory measurement for jobs. Comes with a small runtime overhead.} \item{chunks.as.arrayjobs:}{Execute chunks as array jobs.} \item{pm.backend:}{Start a \pkg{parallelMap} backend on the slave.} \item{foreach.backend:}{Start a \pkg{foreach} backend on the slave.} \item{clusters:}{Resource used for Slurm to select the set of clusters to run \code{sbatch}/\code{squeue}/\code{scancel} on.} } } \section{Chunking of Jobs}{ Multiple jobs can be grouped (chunked) together to be executed sequentially on the batch system as a single batch job. This is especially useful to avoid overburding the scheduler by submitting thousands of jobs simultaneously. To chunk jobs together, job ids must be provided as \code{data.frame} with columns \dQuote{job.id} and \dQuote{chunk} (integer). All jobs with the same chunk number will be executed sequentially inside the same batch job. The utility functions \code{\link{chunk}}, \code{\link{binpack}} and \code{\link{lpt}} can assist in grouping jobs. } \section{Array Jobs}{ If your cluster supports array jobs, you can set the resource \code{chunks.as.arrayjobs} to \code{TRUE} in order to execute chunks as job arrays on the cluster. For each chunk of size \code{n}, \pkg{batchtools} creates a \code{\link{JobCollection}} of (possibly heterogeneous) jobs which is submitted to the scheduler as a single array job with \code{n} repetitions. For each repetition, the \code{JobCollection} is first read from the file system, then subsetted to the \code{i}-th job using the environment variable \code{reg$cluster.functions$array.var} (depending on the cluster backend, defined automatically) and finally executed. } \section{Order of Submission}{ Jobs are submitted in the order of chunks, i.e. jobs which have chunk number \code{sort(unique(ids$chunk))[1]} first, then jobs with chunk number \code{sort(unique(ids$chunk))[2]} and so on. If no chunks are provided, jobs are submitted in the order of \code{ids$job.id}. } \section{Limiting the Number of Jobs}{ If requested, \code{submitJobs} tries to limit the number of concurrent jobs of the user by waiting until jobs terminate before submitting new ones. This can be controlled by setting \dQuote{max.concurrent.jobs} in the configuration file (see \code{\link{Registry}}) or by setting the resource \dQuote{max.concurrent.jobs} to the maximum number of jobs to run simultaneously. If both are set, the setting via the resource takes precedence over the setting in the configuration. } \section{Measuring Memory}{ Setting the resource \code{measure.memory} to \code{TRUE} turns on memory measurement: \code{\link[base]{gc}} is called directly before and after the job and the difference is stored in the internal database. Note that this is just a rough estimate and does neither work reliably for external code like C/C++ nor in combination with threading. } \section{Inner Parallelization}{ Inner parallelization is typically done via threading, sockets or MPI. Two backends are supported to assist in setting up inner parallelization. The first package is \pkg{parallelMap}. If you set the resource \dQuote{pm.backend} to \dQuote{multicore}, \dQuote{socket} or \dQuote{mpi}, \code{\link[parallelMap]{parallelStart}} is called on the slave before the first job in the chunk is started and \code{\link[parallelMap]{parallelStop}} is called after the last job terminated. This way, the resources for inner parallelization can be set and get automatically stored just like other computational resources. The function provided by the user just has to call \code{\link[parallelMap]{parallelMap}} to start parallelization using the preconfigured backend. To control the number of CPUs, you have to set the resource \code{ncpus}. Otherwise \code{ncpus} defaults to the number of available CPUs (as reported by (see \code{\link[parallel]{detectCores}})) on the executing machine for multicore and socket mode and defaults to the return value of \code{\link[Rmpi]{mpi.universe.size}-1} for MPI. Your template must be set up to handle the parallelization, e.g. request the right number of CPUs or start R with \code{mpirun}. You may pass further options like \code{level} to \code{\link[parallelMap]{parallelStart}} via the named list \dQuote{pm.opts}. The second supported parallelization backend is \pkg{foreach}. If you set the resource \dQuote{foreach.backend} to \dQuote{seq} (sequential mode), \dQuote{parallel} (\pkg{doParallel}) or \dQuote{mpi} (\pkg{doMPI}), the requested \pkg{foreach} backend is automatically registered on the slave. Again, the resource \code{ncpus} is used to determine the number of CPUs. Neither the namespace of \pkg{parallelMap} nor the namespace \pkg{foreach} are attached. You have to do this manually via \code{\link[base]{library}} or let the registry load the packages for you. } \examples{ \dontshow{ batchtools:::example_push_temp(3) } ### Example 1: Submit subsets of jobs tmp = makeRegistry(file.dir = NA, make.default = FALSE) # toy function which fails if x is even and an input file does not exists fun = function(x, fn) if (x \%\% 2 == 0 && !file.exists(fn)) stop("file not found") else x # define jobs via batchMap fn = tempfile() ids = batchMap(fun, 1:20, reg = tmp, fn = fn) # submit some jobs ids = 1:10 submitJobs(ids, reg = tmp) waitForJobs(ids, reg = tmp) getStatus(reg = tmp) # create the required file and re-submit failed jobs file.create(fn) submitJobs(findErrors(ids, reg = tmp), reg = tmp) getStatus(reg = tmp) # submit remaining jobs which have not yet been submitted ids = findNotSubmitted(reg = tmp) submitJobs(ids, reg = tmp) getStatus(reg = tmp) # collect results reduceResultsList(reg = tmp) ### Example 2: Using memory measurement tmp = makeRegistry(file.dir = NA, make.default = FALSE) # Toy function which creates a large matrix and returns the column sums fun = function(n, p) colMeans(matrix(runif(n*p), n, p)) # Arguments to fun: args = data.table::CJ(n = c(1e4, 1e5), p = c(10, 50)) # like expand.grid() print(args) # Map function to create jobs ids = batchMap(fun, args = args, reg = tmp) # Set resources: enable memory measurement res = list(measure.memory = TRUE) # Submit jobs using the currently configured cluster functions submitJobs(ids, resources = res, reg = tmp) # Retrive information about memory, combine with parameters info = ijoin(getJobStatus(reg = tmp)[, .(job.id, mem.used)], getJobPars(reg = tmp)) print(unwrap(info)) # Combine job info with results -> each job is aggregated using mean() unwrap(ijoin(info, reduceResultsDataTable(fun = function(res) list(res = mean(res)), reg = tmp))) ### Example 3: Multicore execution on the slave tmp = makeRegistry(file.dir = NA, make.default = FALSE) # Function which sleeps 10 seconds, i-times f = function(i) { parallelMap::parallelMap(Sys.sleep, rep(10, i)) } # Create one job with parameter i=4 ids = batchMap(f, i = 4, reg = tmp) # Set resources: Use parallelMap in multicore mode with 4 CPUs # batchtools internally loads the namespace of parallelMap and then # calls parallelStart() before the job and parallelStop() right # after the job last job in the chunk terminated. res = list(pm.backend = "multicore", ncpus = 4) \dontrun{ # Submit both jobs and wait for them submitJobs(resources = res, reg = tmp) waitForJobs(reg = tmp) # If successfull, the running time should be ~10s getJobTable(reg = tmp)[, .(job.id, time.running)] # There should also be a note in the log: grepLogs(pattern = "parallelMap", reg = tmp) } } ================================================ FILE: man/summarizeExperiments.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarizeExperiments.R \name{summarizeExperiments} \alias{summarizeExperiments} \title{Quick Summary over Experiments} \usage{ summarizeExperiments( ids = NULL, by = c("problem", "algorithm"), reg = getDefaultRegistry() ) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to all jobs. Invalid ids are ignored.} \item{by}{[\code{character}]\cr Split the resulting table by columns of \code{\link{getJobPars}}.} \item{reg}{[\code{\link{ExperimentRegistry}}]\cr Registry. If not explicitly passed, uses the last created registry.} } \value{ [\code{\link[data.table]{data.table}}] of frequencies. } \description{ Returns a frequency table of defined experiments. See \code{\link{ExperimentRegistry}} for an example. } \seealso{ Other Experiment: \code{\link{addExperiments}()}, \code{\link{removeExperiments}()} } \concept{Experiment} ================================================ FILE: man/sweepRegistry.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/sweepRegistry.R \name{sweepRegistry} \alias{sweepRegistry} \title{Check Consistency and Remove Obsolete Information} \usage{ sweepRegistry(reg = getDefaultRegistry()) } \arguments{ \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \description{ Canceled jobs and jobs submitted multiple times may leave stray files behind. This function checks the registry for consistency and removes obsolete files and redundant data base entries. } \seealso{ Other Registry: \code{\link{clearRegistry}()}, \code{\link{getDefaultRegistry}()}, \code{\link{loadRegistry}()}, \code{\link{makeRegistry}()}, \code{\link{removeRegistry}()}, \code{\link{saveRegistry}()}, \code{\link{syncRegistry}()} } \concept{Registry} ================================================ FILE: man/syncRegistry.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/syncRegistry.R \name{syncRegistry} \alias{syncRegistry} \title{Synchronize the Registry} \usage{ syncRegistry(reg = getDefaultRegistry()) } \arguments{ \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{logical(1)}]: \code{TRUE} if the state has changed, \code{FALSE} otherwise. } \description{ Parses update files written by the slaves to the file system and updates the internal data base. } \seealso{ Other Registry: \code{\link{clearRegistry}()}, \code{\link{getDefaultRegistry}()}, \code{\link{loadRegistry}()}, \code{\link{makeRegistry}()}, \code{\link{removeRegistry}()}, \code{\link{saveRegistry}()}, \code{\link{sweepRegistry}()} } \concept{Registry} ================================================ FILE: man/testJob.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/testJob.R \name{testJob} \alias{testJob} \title{Run Jobs Interactively} \usage{ testJob(id, external = FALSE, reg = getDefaultRegistry()) } \arguments{ \item{id}{[\code{integer(1)} or \code{data.table}]\cr Single integer to specify the job or a \code{data.table} with column \code{job.id} and exactly one row.} \item{external}{[\code{logical(1)}]\cr Run the job in an external R session? If \code{TRUE}, starts a fresh R session on the local machine to execute the with \code{\link{execJob}}. You will not be able to use debug tools like \code{\link[base]{traceback}} or \code{\link[base]{browser}}. If \code{external} is set to \code{FALSE} (default) on the other hand, \code{testJob} will execute the job in the current R session and the usual debugging tools work. However, spotting missing variable declarations (as they are possibly resolved in the global environment) is impossible. Same holds for missing package dependency declarations.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ Returns the result of the job if successful. } \description{ Starts a single job on the local machine. } \examples{ \dontshow{ batchtools:::example_push_temp(1) } tmp = makeRegistry(file.dir = NA, make.default = FALSE) batchMap(function(x) if (x == 2) xxx else x, 1:2, reg = tmp) testJob(1, reg = tmp) \dontrun{ testJob(2, reg = tmp) } } \seealso{ Other debug: \code{\link{getErrorMessages}()}, \code{\link{getStatus}()}, \code{\link{grepLogs}()}, \code{\link{killJobs}()}, \code{\link{resetJobs}()}, \code{\link{showLog}()} } \concept{debug} ================================================ FILE: man/unwrap.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/unwrap.R \name{unwrap} \alias{unwrap} \alias{flatten} \title{Unwrap Nested Data Frames} \usage{ unwrap(x, cols = NULL, sep = NULL) flatten(x, cols = NULL, sep = NULL) } \arguments{ \item{x}{[\code{\link{data.frame}} | \code{\link[data.table]{data.table}}]\cr Data frame to flatten.} \item{cols}{[\code{character}]\cr Columns to consider for this operation. If set to \code{NULL} (default), will operate on all columns of type \dQuote{list}.} \item{sep}{[\code{character(1)}]\cr If \code{NULL} (default), the column names of the additional columns will re-use the names of the nested \code{list}/\code{data.frame}. This may lead to name clashes. If you provide \code{sep}, the variable column name will be constructed as \dQuote{[column name of x][sep][inner name]}.} } \value{ [\code{\link[data.table]{data.table}}]. } \description{ Some functions (e.g., \code{\link{getJobPars}}, \code{\link{getJobResources}} or \code{\link{reduceResultsDataTable}} return a \code{data.table} with columns of type \code{list}. These columns can be unnested/unwrapped with this function. The contents of these columns will be transformed to a \code{data.table} and \code{\link[base]{cbind}}-ed to the input data.frame \code{x}, replacing the original nested column. } \note{ There is a name clash with function \code{flatten} in package \pkg{purrr}. The function \code{flatten} is discouraged to use for this reason in favor of \code{unwrap}. } \examples{ x = data.table::data.table( id = 1:3, values = list(list(a = 1, b = 3), list(a = 2, b = 2), list(a = 3)) ) unwrap(x) unwrap(x, sep = ".") } ================================================ FILE: man/waitForJobs.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/waitForJobs.R \name{waitForJobs} \alias{waitForJobs} \title{Wait for Termination of Jobs} \usage{ waitForJobs( ids = NULL, sleep = NULL, timeout = 604800, expire.after = NULL, stop.on.error = FALSE, stop.on.expire = FALSE, reg = getDefaultRegistry() ) } \arguments{ \item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) with a column named \dQuote{job.id}. Alternatively, you may also pass a vector of integerish job ids. If not set, defaults to the return value of \code{\link{findSubmitted}}. Invalid ids are ignored.} \item{sleep}{[\code{function(i)} | \code{numeric(1)}]\cr Parameter to control the duration to sleep between queries. You can pass an absolute numeric value in seconds or a \code{function(i)} which returns the number of seconds to sleep in the \code{i}-th iteration. If not provided (\code{NULL}), tries to read the value (number/function) from the configuration file (stored in \code{reg$sleep}) or defaults to a function with exponential backoff between 5 and 120 seconds.} \item{timeout}{[\code{numeric(1)}]\cr After waiting \code{timeout} seconds, show a message and return \code{FALSE}. This argument may be required on some systems where, e.g., expired jobs or jobs on hold are problematic to detect. If you don't want a timeout, set this to \code{Inf}. Default is \code{604800} (one week).} \item{expire.after}{[\code{integer(1)}]\cr Jobs count as \dQuote{expired} if they are not found on the system but have not communicated back their results (or error message). This frequently happens on managed system if the scheduler kills a job because the job has hit the walltime or request more memory than reserved. On the other hand, network file systems often require several seconds for new files to be found, which can lead to false positives in the detection heuristic. \code{waitForJobs} treats such jobs as expired after they have not been detected on the system for \code{expire.after} iterations. If not provided (\code{NULL}), tries to read the value from the configuration file (stored in \code{reg$expire.after}), and finally defaults to \code{3}.} \item{stop.on.error}{[\code{logical(1)}]\cr Immediately cancel if a job terminates with an error? Default is \code{FALSE}.} \item{stop.on.expire}{[\code{logical(1)}]\cr Immediately cancel if jobs are detected to be expired? Default is \code{FALSE}. Expired jobs will then be ignored for the remainder of \code{waitForJobs()}.} \item{reg}{[\code{\link{Registry}}]\cr Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).} } \value{ [\code{logical(1)}]. Returns \code{TRUE} if all jobs terminated successfully and \code{FALSE} if either the timeout is reached or at least one job terminated with an exception or expired. } \description{ This function simply waits until all jobs are terminated. } ================================================ FILE: man-roxygen/expreg.R ================================================ #' @param reg [\code{\link{ExperimentRegistry}}]\cr #' Registry. If not explicitly passed, uses the last created registry. ================================================ FILE: man-roxygen/id.R ================================================ #' @param id [\code{integer(1)} or \code{data.table}]\cr #' Single integer to specify the job or a \code{data.table} with column \code{job.id} #' and exactly one row. ================================================ FILE: man-roxygen/ids.R ================================================ #' @param ids [\code{\link[base]{data.frame}} or \code{integer}]\cr #' A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}}) #' with a column named \dQuote{job.id}. #' Alternatively, you may also pass a vector of integerish job ids. #' If not set, defaults to <%= switch(ids.default, "all" = "all jobs", "none" = "no job", sprintf("the return value of \\code{\\link{%s}}", ids.default)) %>. #' Invalid ids are ignored. ================================================ FILE: man-roxygen/missing.val.R ================================================ #' @param missing.val [\code{ANY}]\cr #' Value to impute as result for a job which is not finished. #' If not provided and a result is missing, an exception is raised. ================================================ FILE: man-roxygen/more.args.R ================================================ #' @param more.args [\code{list}]\cr #' A list of further arguments passed to \code{fun}. #' Default is an empty list. ================================================ FILE: man-roxygen/ncpus.R ================================================ #' @param ncpus [\code{integer(1)}]\cr #' Number of CPUs. #' Default is to use all logical cores. The total number of cores "available" can be set via the option \code{mc.cores} #' and defaults to the heuristic implemented in \code{\link[parallel]{detectCores}}. ================================================ FILE: man-roxygen/nodename.R ================================================ #' @param nodename [\code{character(1)}]\cr #' Nodename of the master host. All commands are send via SSH to this host. Only works iff #' \enumerate{ #' \item{Passwordless authentication (e.g., via SSH public key authentication) is set up.} #' \item{The file directory is shared across machines, e.g. mounted via SSHFS.} #' \item{Either the absolute path to the \code{file.dir} is identical on the machines, or paths are provided relative to the home directory. Symbolic links should work.} #' } ================================================ FILE: man-roxygen/reg.R ================================================ #' @param reg [\code{\link{Registry}}]\cr #' Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}). ================================================ FILE: man-roxygen/template.R ================================================ #' @param template [\code{character(1)}]\cr #' Either a path to a \pkg{brew} template file (with extension \dQuote{tmpl}), or a short descriptive name enabling the following heuristic for the file lookup: #' \enumerate{ #' \item \dQuote{batchtools.[template].tmpl} in the path specified by the environment variable \dQuote{R_BATCHTOOLS_SEARCH_PATH}. #' \item \dQuote{batchtools.[template].tmpl} in the current working directory. #' \item \dQuote{[template].tmpl} in the user config directory (see \code{\link[rappdirs]{user_config_dir}}); on linux this is usually \dQuote{~/.config/batchtools/[template].tmpl}. #' \item \dQuote{.batchtools.[template].tmpl} in the home directory. #' \item \dQuote{[template].tmpl} in the package installation directory in the subfolder \dQuote{templates}. #' } ================================================ FILE: paper/codemeta.json ================================================ { "@context": "https://raw.githubusercontent.com/mbjones/codemeta/master/codemeta.jsonld", "@type": "Code", "author": [ { "@id": "https://orcid.org/0000-0001-9754-0393", "@type": "Person", "email": "lang@statistik.tu-dortmund.de", "name": "Michel Lang", "affiliation": "TU Dortmund University" }, { "@id": "https://orcid.org/0000-0001-6002-6980", "@type": "Person", "email": "bernd.bischl@stat.uni-muenchen.de", "name": "Bernd Bischl", "affiliation": "LMU Munich" }, { "@id": "https://orcid.org/0000-0003-0873-137X", "@type": "Person", "email": "surmann@statistik.tu-dortmund.de", "name": "Dirk Surmann", "affiliation": "TU Dortmund University" } ], "identifier": "https://doi.org/10.5281/zenodo.165184", "codeRepository": "https://github.com/mllg/batchtools", "datePublished": "2016-11-10", "dateModified": "2016-11-10", "dateCreated": "2016-11-10", "description": "Tools for computation on batch systems", "keywords": "r, parallelization, high-performance computing, batch systems", "license": "GPL v3.0", "title": "batchtools", "version": "v0.9.0" } ================================================ FILE: paper/paper.bib ================================================ @Article{batchjobs_2015, title = {{BatchJobs} and {BatchExperiments}: Abstraction Mechanisms for Using {R} in Batch Environments}, author = {Bernd Bischl and Michel Lang and Olaf Mersmann and J{\"o}rg Rahnenf{\"u}hrer and Claus Weihs}, journal = {Journal of Statistical Software}, year = {2015}, volume = {64}, number = {11}, pages = {1--25}, url = {https://www.jstatsoft.org/v64/i11/}, doi = {10.18637/jss.v064.i11}, } @Manual{R, title = {R: A Language and Environment for Statistical Computing}, author = {{R Core Team}}, organization = {R Foundation for Statistical Computing}, address = {Vienna, Austria}, year = {2016}, url = {https://www.R-project.org/}, } @Manual{snow, title = {snow: Simple Network of Workstations}, author = {Luke Tierney and A. J. Rossini and Na Li and H. Sevcikova}, year = {2016}, note = {R package version 0.4-2}, url = {https://CRAN.R-project.org/package=snow}, } @Manual{data_table, title = {data.table: Extension of Data.frame}, author = {M Dowle and A Srinivasan and T Short and S Lianoglou with contributions from R Saporta and E Antonyan}, year = {2015}, note = {R package version 1.9.6}, url = {https://CRAN.R-project.org/package=data.table}, } ================================================ FILE: paper/paper.md ================================================ --- title: 'batchtools: Tools for R to work on batch systems' tags: - R - high-performance computing - batch systems - parallelization authors: - name: Michel Lang orcid: 0000-0001-9754-0393 affiliation: 1 - name: Bernd Bischl orcid: 0000-0001-6002-6980 affiliation: 2 - name: Dirk Surmann orcid: 0000-0003-0873-137X affiliation: 1 affiliations: - name: TU Dortmund University index: 1 - name: LMU Munich index: 2 date: 9 November 2016 bibliography: paper.bib --- # Summary The [`R`](https://www.r-project.org/) [@R] package [`batchtools`](https://github.com/mllg/batchtools) is the successor of the [`BatchJobs`](https://github.com/tudo-r/BatchJobs) package [@batchjobs_2015]. It provides an implementation of a Map-like operation to define and asynchronously execute jobs on a variety of parallel backends: * Local (blocking) execution in the current `R` session or in an externally spawned `R` process (intended for debugging and prototyping) * Local (non-blocking) parallel execution using `parallel`'s multicore backend [@R] or [`snow`](https://cran.r-project.org/package=snow)'s socket mode [@snow]. * Execution on loosely connected machines using SSH (including basic resource usage control). * [Docker Swarm](https://docs.docker.com/engine/swarm/) * [IBM Spectrum LSF](https://www.ibm.com/products/hpc-workload-management) * [OpenLava](https://www.openlava.org/) * [Univa Grid Engine](https://www.univa.com/) (formerly Oracle Grind Engine and Sun Grid Engine) * [Slurm Workload Manager](https://slurm.schedmd.com/) * [TORQUE/PBS Resource Manager](https://adaptivecomputing.com/cherry-services/moab-hpc/) Extensibility and user customization are important features as configuration on high-performance computing clusters is often heavily tailored towards very specific requirements or special hardware. Hence, the interaction with the schedulers uses a template engine for improved flexibility. Furthermore, custom functions can be hooked into the package to be called at certain events. As a last resort, many utility functions simplify the implementation of a custom cluster backend from scratch. The communication between the master `R` session and the computational nodes is kept as simple as possible and runs completely on the file system which greatly simplifies the extension to additional parallel platforms. The [`data.table`](https://github.com/Rdatatable/data.table) package [@data_table] acts as an in-memory database to keep track of the computational status of all jobs. Unique job seeds ensure reproducibility across systems, log files can conveniently be searched using regular expressions and jobs can be annotated with arbitrary tags. Jobs can be chunked (i.e., merged into one technical cluster job) to be executed as one virtual job on a node (executed sequentially or using multiple local CPUs) in order to reduce the overhead induced by job management and starting/stopping `R`. All in all, the provided tools allow users to work with many thousands or even millions of jobs in an organized and efficient manner. The `batchtools` package also comes with an abstraction mechanism to assist in conducting large-scale computer experiments, especially suited for (but not restricted to) benchmarking and exploration of algorithm performance. The mechanism is similar to [`BatchExperiments`](https://github.com/tudo-r/BatchExperiments) [@batchjobs_2015] which `batchtools` now also supersedes: After defining the building blocks of most computer experiments, problems and algorithms, both can be parametrized to define jobs which are then in a second step submitted to one of the parallel backends. Important changes to its predecessors are summarized in a vignette to help users of [`BatchJobs`](https://github.com/tudo-r/BatchJobs)/[`BatchExperiments`](https://github.com/tudo-r/BatchExperiments) migrating their cluster configuration and aid the transition to `batchtools`. # References ================================================ FILE: src/Makevars ================================================ PKG_CFLAGS=${R_DEBUG_FLAGS} ================================================ FILE: src/binpack.c ================================================ #include #include #include #include SEXP attribute_hidden c_binpack(SEXP x_, SEXP order_, SEXP capacity_) { const double * x = REAL(x_); const R_len_t n = length(x_); const int * order = INTEGER(order_); const double capacity = REAL(capacity_)[0]; R_len_t ii = order[0] - 1; if (x[ii] > capacity) error("Capacity not sufficient. Largest item does not fit."); SEXP res = PROTECT(allocVector(INTSXP, n)); int * bin = INTEGER(res); double * capacities = malloc(n * sizeof(double)); R_len_t bins = 1; bin[ii] = 1; capacities[0] = capacity - x[ii]; for (R_len_t i = 1; i < n; i++) { ii = order[i] - 1; bool packed = false; for (R_len_t pos = 0; !packed && pos < bins; pos++) { if (capacities[pos] >= x[ii]) { packed = true; bin[ii] = pos + 1; capacities[pos] -= x[ii]; break; } } if (!packed) { capacities[bins] = capacity - x[ii]; bins++; bin[ii] = bins; } } free(capacities); UNPROTECT(1); return res; } ================================================ FILE: src/count_not_missing.c ================================================ #include #include #include static R_len_t count_not_missing_logical(SEXP x) { const int * xp = LOGICAL(x); const int * const xe = xp + length(x); R_len_t count = 0; for (; xp != xe; xp++) { if (*xp != NA_LOGICAL) count++; } return count; } static R_len_t count_not_missing_integer(SEXP x) { const int * xp = INTEGER(x); const int * const xe = xp + length(x); R_len_t count = 0; for (; xp != xe; xp++) { if (*xp != NA_INTEGER) count++; } return count; } static R_len_t count_not_missing_double(SEXP x) { const double * xp = REAL(x); const double * const xe = xp + length(x); R_len_t count = 0; for (; xp != xe; xp++) { if (!ISNAN(*xp)) count++; } return count; } static R_len_t count_not_missing_string(SEXP x) { const R_len_t nx = length(x); R_len_t count = 0; for (R_len_t i = 0; i < nx; i++) { if (STRING_ELT(x, i) != NA_STRING) count++; } return count; } static R_len_t count_not_missing_list(SEXP x) { const R_len_t nx = length(x); R_len_t count = 0; for (R_len_t i = 0; i < nx; i++) { if (!isNull(VECTOR_ELT(x, i))) count++; } return count; } SEXP attribute_hidden count_not_missing(SEXP x) { switch(TYPEOF(x)) { case LGLSXP: return ScalarInteger(count_not_missing_logical(x)); case INTSXP: return ScalarInteger(count_not_missing_integer(x)); case REALSXP: return ScalarInteger(count_not_missing_double(x)); case STRSXP: return ScalarInteger(count_not_missing_string(x)); case VECSXP: return ScalarInteger(count_not_missing_list(x)); case NILSXP: return ScalarInteger(0); default: error("Object of type '%s' not supported", type2char(TYPEOF(x))); } } ================================================ FILE: src/fill_gaps.c ================================================ #include #include #include /* similar to last observation carried forward, but resets to NA if the last observation is spotted again */ /* used in log file reading: jobs have a start and stop marker, the lines in between belong to the job */ SEXP attribute_hidden fill_gaps(SEXP x) { const R_len_t n = length(x); int last = NA_INTEGER; const int *xi = INTEGER(x); const int * const xend = xi + n; SEXP y = PROTECT(allocVector(INTSXP, n)); int *yi = INTEGER(y); for(; xi != xend; xi++, yi++) { if (*xi == NA_INTEGER) { *yi = last; } else { *yi = *xi; last = (*xi == last) ? NA_INTEGER : *xi; } } UNPROTECT(1); return y; } ================================================ FILE: src/init.c ================================================ #include #include #include // for NULL #include /* .Call calls */ extern SEXP c_binpack(SEXP, SEXP, SEXP); extern SEXP c_lpt(SEXP, SEXP, SEXP); extern SEXP count_not_missing(SEXP); extern SEXP fill_gaps(SEXP); static const R_CallMethodDef CallEntries[] = { {"c_binpack", (DL_FUNC) &c_binpack, 3}, {"c_lpt", (DL_FUNC) &c_lpt, 3}, {"count_not_missing", (DL_FUNC) &count_not_missing, 1}, {"fill_gaps", (DL_FUNC) &fill_gaps, 1}, {NULL, NULL, 0} }; void R_init_batchtools(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } ================================================ FILE: src/lpt.c ================================================ #include #include #include #define min(a, b) (((a) < (b)) ? (a) : (b)) SEXP attribute_hidden c_lpt(SEXP x_, SEXP order_, SEXP chunks_) { const double * x = REAL(x_); const R_len_t n = length(x_); const int * order = INTEGER(order_); const int chunks = min(INTEGER(chunks_)[0], n); SEXP res = PROTECT(allocVector(INTSXP, n)); int * bin = INTEGER(res); double * sums = malloc(chunks * sizeof(double)); for (R_len_t i = 0; i < chunks; i++) { R_len_t ii = order[i] - 1; bin[ii] = i + 1; sums[i] = x[ii]; } for (R_len_t i = chunks; i < n; i++) { R_len_t ii = order[i] - 1; R_len_t pos = 0; for (R_len_t j = 1; j < chunks; j++) { if (sums[j] < sums[pos]) pos = j; } bin[ii] = pos + 1; sums[pos] += x[ii]; } free(sums); UNPROTECT(1); return res; } ================================================ FILE: tests/testthat/helper.R ================================================ library("testthat") library("data.table") library("checkmate") library("stringi") requireNamespace("withr") options(datatable.rbindlist.check="error") is_on_ci = function() { identical(Sys.getenv("APPVEYOR"), "True") || identical(Sys.getenv("TRAVIS"), "true") } getSysConf = function() { conf.file = findConfFile() if (!checkmate::testScalarNA(conf.file)) { ee = new.env() sys.source(conf.file, envir = ee) as.list(ee) } else { list() } } makeTestRegistry = function(file.dir = NA, make.default = FALSE, ...) { reg = makeRegistry(file.dir = file.dir, make.default = make.default, ...) # cleanup registry directories if not a subdirectory of R's temp dir if ((is.na(file.dir) && !identical(reg$temp.dir, fs::path_temp()))) reg.finalizer(e = reg, f = function(reg) if (fs::dir_exists(reg$file.dir)) fs::dir_delete(reg$file.dir), onexit = TRUE) return(reg) } makeTestExperimentRegistry = function(file.dir = NA, make.default = FALSE, ...) { reg = makeExperimentRegistry(file.dir = file.dir, make.default = make.default, ...) # cleanup registry directories if not a subdirectory of R's temp dir if ((is.na(file.dir) && !identical(reg$temp.dir, fs::path_temp()))) reg.finalizer(e = reg, f = function(reg) if (fs::dir_exists(reg$file.dir)) fs::dir_delete(reg$file.dir), onexit = TRUE) return(reg) } silent = function(expr) { withr::with_options(list(batchtools.progress = FALSE, batchtools.verbose = FALSE), expr) } s.chunk = function(ids) { ids$chunk = 1L ids } submitAndWait = function(reg, ids = NULL, ..., sleep = 1) { ids = if (is.null(ids)) findNotSubmitted(reg = reg) else convertIds(reg, ids, keep.extra = names(ids)) if ("chunk" %chnin% names(ids)) ids = s.chunk(ids) silent({ ids = submitJobs(ids = ids, ..., reg = reg) waitForJobs(ids, expire.after = 10L, reg = reg, sleep = sleep) }) } suppressAll = function (expr) { silent(capture.output({z = suppressWarnings(suppressMessages(suppressPackageStartupMessages(force(expr))))})) invisible(z) } checkTables = function(reg, ...) { checkmate::expect_string(reg$hash) checkmate::expect_posixct(reg$mtime, len = 1L) if (class(reg)[1L] == "Registry") { cols = c("def.id", "job.pars") types = c("integer", "list") } else { cols = c("def.id", "problem", "prob.pars", "algorithm", "algo.pars", "pars.hash") types = c("integer", "character", "list", "character", "list", "character") } expect_is(reg$defs, "data.table") checkmate::expect_data_table(reg$defs, ncols = length(cols), ...) checkmate::expect_set_equal(colnames(reg$defs), cols) expect_equal(as.character(reg$defs[, lapply(.SD, class), .SDcols = cols]), types) expect_equal(key(reg$defs), "def.id") expect_equal(anyDuplicated(reg$defs, by = "def.id"), 0L) if (class(reg)[1L] == "Registry") { cols = c("job.id", "def.id", "submitted", "started", "done", "error", "mem.used", "resource.id", "batch.id", "log.file", "job.hash", "job.name") types = c("integer", "integer", "numeric", "numeric", "numeric", "character", "numeric", "integer", "character", "character", "character", "character") } else { cols = c("job.id", "def.id", "submitted", "started", "done", "error", "mem.used", "resource.id", "batch.id", "log.file", "job.hash", "job.name", "repl") types = c("integer", "integer", "numeric", "numeric", "numeric", "character", "numeric", "integer", "character", "character", "character", "character", "integer") } expect_is(reg$status, "data.table") checkmate::expect_data_table(reg$status, ncols = length(cols), ...) checkmate::expect_set_equal(colnames(reg$status), cols) expect_equal(as.character(reg$status[, lapply(.SD, class), .SDcols = cols]), types) expect_equal(key(reg$status), "job.id") expect_equal(anyDuplicated(reg$status, by = "job.id"), 0L) checkStatusIntegrity(reg) cols = c("resource.id", "resource.hash", "resources") types = c("integer", "character", "list") checkmate::expect_data_table(reg$resources, ncols = length(cols), ...) checkmate::expect_set_equal(colnames(reg$resources), cols) expect_equal(as.character(reg$resources[, lapply(.SD, class), .SDcols = cols]), types) expect_equal(key(reg$resources), "resource.id") expect_equal(anyDuplicated(reg$resources, by = "resource.id"), 0L) cols = c("job.id", "tag") types = c("integer", "character") checkmate::expect_data_table(reg$tags, ncols = length(cols), ...) checkmate::expect_set_equal(colnames(reg$tags), cols) expect_equal(as.character(reg$tags[, lapply(.SD, class), .SDcols = cols]), types) expect_equal(key(reg$tags), "job.id") if (class(reg)[1L] == "ExperimentRegistry") { checkmate::expect_character(reg$problems, any.missing = FALSE, unique = TRUE) checkmate::expect_character(reg$algorithms, any.missing = FALSE, unique = TRUE) checkmate::expect_integer(reg$status$repl, lower = 1L, any.missing = FALSE) checkmate::expect_subset(reg$defs$problem, reg$problems) checkmate::expect_subset(reg$defs$algorithm, reg$algorithms) } expect_key_set_equal(reg$defs, reg$status, by = "def.id") expect_key_set_equal(reg$status[!is.na(resource.id)], reg$resources, by = "resource.id") if (nrow(reg$status) > 0L) checkmate::expect_data_table(ajoin(reg$tags, reg$status, by = "job.id"), nrow = 0) else expect_equal(nrow(reg$tags), 0) } checkStatusIntegrity = function(reg) { tab = reg$status[, list(job.id, code = (!is.na(submitted)) + 2L * (!is.na(started)) + 4L * (!is.na(done)) + 8L * (!is.na(error)))] # submitted started done error # 2^0 2^1 2^2 2^3 # 1 2 4 8 # ------------------------------------------------------ # 0 0 0 0 -> 0 (unsubmitted) # 1 0 0 0 -> 1 (submitted) # 1 1 0 0 -> 3 (started) # 1 1 1 0 -> 7 (done) # 1 1 1 1 -> 15 (error) checkmate::expect_subset(tab$code, c(0L, 1L, 3L, 7L, 15L), info = "Status Integrity") } expect_copied = function(x, y) { expect_false(data.table:::address(x) == data.table:::address(y)) } expect_key_set_equal = function(x, y, by = NULL) { expect_true(nrow(ajoin(x, y, by = by)) == 0 && nrow(ajoin(y, x, by = by)) == 0) } ================================================ FILE: tests/testthat/test_Algorithm.R ================================================ test_that("addAlgorithm", { reg = makeTestExperimentRegistry() algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) NULL) expect_is(algo, "Algorithm") expect_equal(algo$name, "a1") expect_function(algo$fun) expect_file_exists(getAlgorithmURI(reg, algo$name)) prob = addProblem(reg = reg, "p1", data = iris, fun = function(job, data) nrow(data)) algo = addAlgorithm(reg = reg, "a2", fun = function(...) NULL) ids = addExperiments(list(p1 = data.table()), algo.designs = list(a1 = data.table(), a2 = data.table()), repls = 2, reg = reg) expect_integer(ids$job.id, len = 4L) removeAlgorithms(reg = reg, "a1") expect_integer(reg$status$job.id, len = 2L) expect_set_equal(reg$algorithms, "a2") expect_set_equal(reg$algorithms, "a2") expect_false(fs::file_exists(getAlgorithmURI(reg, "a1"))) expect_true(fs::file_exists(getAlgorithmURI(reg, "a2"))) expect_set_equal(getJobPars(reg = reg)$algorithm, "a2") checkTables(reg) }) test_that("addAlgorithm overwrites old algo", { reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", data = iris, fun = function(job, data) 2) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) instance * 2) ids = addExperiments(list(p1 = data.table()), list(a1 = data.table()), reg = reg) run = function(id) suppressAll(execJob(makeJob(id, reg = reg))) expect_equal(run(1), 4) prob = addProblem(reg = reg, "p1", data = iris, fun = function(job, data) 4) expect_equal(run(1), 8) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) instance * 8) expect_equal(run(1), 32) }) ================================================ FILE: tests/testthat/test_ClusterFunctionHyperQueue.R ================================================ test_that("clusterFunctionsHyperQueue", { skip_if(TRUE) skip_on_ci() skip_on_cran() reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsHyperQueue() saveRegistry(reg) fun = function(x) { Sys.sleep(5) TRUE } ids = batchMap(fun, x = c(5, 5), reg = reg) submitJobs(1:2, reg = reg) waitForJobs(ids = ids, reg = reg) expect_data_table(findJobs(ids = ids, reg = reg), nrow = 2) expect_data_table(findRunning(reg = reg), nrow = 0L) }) test_that("clusterFunctionsHyperQueue: killJob", { skip_if(TRUE) skip_on_ci() skip_on_cran() reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsHyperQueue() saveRegistry(reg) fun = function(x) { Sys.sleep(5); TRUE } ids = batchMap(fun, x = c(5, 5), reg = reg) submitJobs(1:2, reg = reg) Sys.sleep(1) expect_data_table(killJobs(1, reg = reg), nrow = 1) }) test_that("clusterFunctionsHyperQueue with resources", { skip_if(TRUE) skip_on_ci() skip_on_cran() reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsHyperQueue() saveRegistry(reg) fun = function(x) { Sys.sleep(5) TRUE } ids = batchMap(fun, x = c(5, 5), reg = reg) submitJobs(1:2, reg = reg, resources = list(ncpus = 2, walltime = 10, memory = 5)) waitForJobs(ids = ids, reg = reg) expect_data_table(findJobs(ids = ids, reg = reg), nrow = 2) expect_data_table(findRunning(reg = reg), nrow = 0L) }) ================================================ FILE: tests/testthat/test_ClusterFunctions.R ================================================ test_that("clusterFunctions constructor", { check = function(cf) { expect_is(cf, "ClusterFunctions") expect_set_equal(names(cf), c("name", "submitJob", "killJob", "listJobsQueued", "listJobsRunning", "store.job.collection", "store.job.files", "array.var", "scheduler.latency", "fs.latency", "hooks")) expect_output(print(cf), "ClusterFunctions for mode") } reg = makeTestRegistry() check(reg$cluster.functions) fn = fs::path(fs::path_temp(), "dummy.tmpl") writeLines("foo", fn) check(makeClusterFunctionsInteractive()) check(makeClusterFunctionsSGE(template = fn)) check(makeClusterFunctionsTORQUE(template = fn)) check(makeClusterFunctionsSlurm(template = fn)) check(makeClusterFunctionsOpenLava(template = fn)) check(makeClusterFunctionsLSF(template = fn)) check(makeClusterFunctionsTORQUE("torque-lido")) check(makeClusterFunctionsSlurm("slurm-dortmund")) check(makeClusterFunctionsDocker("image")) expect_error(makeClusterFunctionsLSF(), "point to a readable template file") skip_on_os(c("windows", "solaris")) # system2 is broken on solaris check(makeClusterFunctionsSSH(workers = list(Worker$new(nodename = "localhost", ncpus = 1L)))) }) test_that("submitJobResult", { x = makeSubmitJobResult(0, 99) expect_is(x, "SubmitJobResult") expect_identical(x$status, 0L) expect_identical(x$batch.id, 99) expect_identical(x$msg, "OK") x = makeSubmitJobResult(1, 99) expect_is(x, "SubmitJobResult") expect_identical(x$msg, "TEMPERROR") x = makeSubmitJobResult(101, 99) expect_is(x, "SubmitJobResult") expect_identical(x$msg, "ERROR") expect_output(print(x), "submission result") x = cfHandleUnknownSubmitError(cmd = "ls", exit.code = 42L, output = "answer to life") expect_is(x, "SubmitJobResult") expect_true(all(stri_detect_fixed(x$msg, c("ls", "42", "answer to life")))) }) test_that("brew", { fn = fs::file_temp() lines = c("####", " ", "!!!", "foo=<%= job.hash %>") writeLines(lines, fn) res = stri_split_fixed(cfReadBrewTemplate(fn), "\n")[[1]] assertCharacter(res, len = 3) expect_equal(sum(stri_detect_fixed(res, "job.hash")), 1) res = stri_split_fixed(cfReadBrewTemplate(fn, comment.string = "###"), "\n")[[1]] assertCharacter(res, len = 2) expect_equal(sum(stri_detect_fixed(res, "job.hash")), 1) reg = makeTestRegistry() ids = batchMap(identity, 1:2, reg = reg) jc = makeJobCollection(1, reg = reg) text = cfReadBrewTemplate(fn, comment.string = "###") fn = cfBrewTemplate(text = text, jc = jc, reg = reg) brewed = readLines(fn) expect_equal(brewed[1], "!!!") expect_equal(brewed[2], sprintf("foo=%s", jc$job.hash)) fs::file_delete(fn) }) test_that("Special chars in directory names", { reg = makeTestRegistry() base.dir = fs::file_temp(pattern = "test", tmp_dir = fs::path_dir(reg$file.dir)) fs::dir_create(base.dir) file.dir = fs::path(base.dir, "test#some_frequently-used chars") reg = makeTestRegistry(file.dir = file.dir) batchMap(identity, 1:2, reg = reg) submitAndWait(reg = reg) Sys.sleep(0.2) expect_equal(reduceResultsList(reg = reg), list(1L, 2L)) expect_equal(testJob(1, external = FALSE, reg = reg), 1L) }) test_that("Export of environment variable DEBUGME", { reg = makeTestRegistry() if (reg$cluster.functions$name == "Socket") skip("Environment variables not exported for CF socket") batchMap(function(i) Sys.getenv("DEBUGME"), i = 1, reg = reg) withr::local_envvar(c("DEBUGME" = "grepme")) submitAndWait(reg, 1) res = loadResult(1, reg = reg) expect_string(res, min.chars = 1, fixed = "grepme") }) test_that("findTemplateFile", { d = fs::path_temp() fn = fs::path(d, "batchtools.slurm.tmpl") fs::file_create(fn) withr::with_envvar(list(R_BATCHTOOLS_SEARCH_PATH = d), expect_equal(findTemplateFile("slurm"), fs::path_abs(fn)) ) fs::file_delete(fn) }) ================================================ FILE: tests/testthat/test_ClusterFunctionsMulticore.R ================================================ test_that("cf multicore", { skip_on_os("windows") reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsMulticore(2) ids = batchMap(Sys.sleep, time = c(2, 2), reg = reg) silent({ submitJobs(1:2, reg = reg) expect_equal(findOnSystem(reg = reg), findJobs(reg = reg)) expect_true(waitForJobs(sleep = 0.2, expire.after = 1, reg = reg)) }) expect_data_table(findOnSystem(reg = reg), nrow = 0) expect_equal(findDone(reg = reg), findJobs(reg = reg)) # check that max.concurrent.jobs works reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsMulticore(2) reg$max.concurrent.jobs = 1 ids = batchMap(Sys.sleep, time = c(2, 0), reg = reg) submitAndWait(1:2, reg = reg) tab = getJobStatus(reg = reg) expect_true(diff(tab$started) > 1) }) if (FALSE) { # Multicore cleans up finished processes reg = makeTestRegistry() batchMap(Sys.sleep, rep(0.8, 8), reg = reg) parallel::mccollect() p = self = Multicore$new(4) for (i in 1:4) { p$spawn(makeJobCollection(i, reg = reg)) } expect_data_table(p$jobs, ncol = 2) expect_integer(p$jobs$pid, len = 4L, any.missing = FALSE, lower = 0L) expect_integer(p$jobs$count, len = 4L, any.missing = FALSE, lower = 0L, upper = 1L) Sys.sleep(1.5) p$spawn(makeJobCollection(5L, reg = reg)) expect_integer(p$jobs$pid, len = 1L, any.missing = FALSE, lower = 0L) p$collect(3) p$collect(1) x = parallel::mccollect() expect_true(is.null(x)) } ================================================ FILE: tests/testthat/test_ClusterFunctionsSSH.R ================================================ test_that("cf ssh", { skip_on_os("windows") skip_on_ci() skip_on_cran() reg = makeTestRegistry() if (reg$cluster.functions$name == "Interactive") { workers = list(Worker$new("localhost", ncpus = 2, max.load = 9999)) reg$cluster.functions = makeClusterFunctionsSSH(workers) saveRegistry(reg) fun = function(x) { Sys.sleep(x); is(x, "numeric") } ids = batchMap(fun, x = c(5, 5), reg = reg) silent({ submitJobs(1:2, reg = reg) Sys.sleep(0.2) expect_equal(findOnSystem(reg = reg), findJobs(reg = reg)) expect_true(killJobs(2, reg = reg)$killed) expect_true( waitForJobs(1, sleep = 0.5, reg = reg) ) }) expect_equal(findDone(reg = reg), findJobs(ids = 1, reg = reg)) expect_equal(findNotDone(reg = reg), findJobs(ids = 2, reg = reg)) expect_true(loadResult(1, reg = reg)) } }) if (FALSE) { reg = makeTestRegistry() workers = list(Worker$new("129.217.207.53"), Worker$new("localhost", ncpus = 1)) reg$cluster.functions = makeClusterFunctionsSSH(workers) expect_string(workers[[1L]]$script) expect_string(workers[[2L]]$script) expect_equal(workers[[1L]]$ncpus, 4L) expect_equal(workers[[2L]]$ncpus, 1L) fun = function(x) { Sys.sleep(x); is(x, "numeric") } ids = batchMap(fun, x = 20 * c(1, 1), reg = reg) submitJobs(1:2, reg = reg) expect_equal(findOnSystem(reg = reg), findJobs(reg = reg)) expect_true(killJobs(2, reg = reg)$killed) expect_true(waitForJobs(1, reg = reg, sleep = 1)) expect_equal(findDone(reg = reg), findJobs(ids = 1, reg = reg)) expect_equal(findNotDone(reg = reg), findJobs(ids = 2, reg = reg)) expect_true(loadResult(1, reg = reg)) } ================================================ FILE: tests/testthat/test_ClusterFunctionsSocket.R ================================================ test_that("cf socket", { skip_if_not_installed("snow") # skip_on_ci() reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsSocket(2) ids = batchMap(Sys.sleep, time = c(5, 5), reg = reg) silent({ submitJobs(1:2, reg = reg) expect_equal(findOnSystem(reg = reg), findJobs(reg = reg)) expect_true(waitForJobs(sleep = 0.5, reg = reg)) }) expect_data_table(findOnSystem(reg = reg), nrow = 0) expect_equal(findDone(reg = reg), findJobs(reg = reg)) }) ================================================ FILE: tests/testthat/test_ExperimentRegistry.R ================================================ test_that("makeTestExperimentRegistry", { reg = makeTestExperimentRegistry() expect_is(reg, "Registry") expect_is(reg, "ExperimentRegistry") expect_true(is.environment(reg)) expect_directory_exists(reg$file.dir, access = "rw") expect_directory_exists(reg$work.dir, access = "r") expect_directory_exists(fs::path(reg$file.dir, c("jobs", "results", "updates", "logs"))) expect_file(fs::path(reg$file.dir, "registry.rds")) expect_character(reg$packages, any.missing = FALSE) expect_character(reg$namespaces, any.missing = FALSE) expect_int(reg$seed, na.ok = FALSE) expect_true(reg$writeable) expect_is(reg$cluster.functions, "ClusterFunctions") expect_list(reg$default.resources, names = "strict") checkTables(reg, any.missing = FALSE, nrows = 0L) expect_character(reg$problems, len = 0L) expect_character(reg$algorithms, len = 0L) expect_output(print(reg), "Experiment Registry") }) test_that("Printer works (#170)", { reg = makeTestExperimentRegistry() expect_character(reg$problems, len = 0L) expect_character(reg$algorithms, len = 0L) expect_output(print(reg), "Problems[[:space:]]*:[[:space:]]*0") expect_output(print(reg), "Algorithms[[:space:]]*:[[:space:]]*0") addProblem("iris", data = iris, reg = reg) addAlgorithm("foo", fun = function(...) list(...), reg = reg) expect_character(reg$problems, len = 1L, any.missing = FALSE) expect_character(reg$algorithms, len = 1L, any.missing = FALSE) expect_output(print(reg), "Problems[[:space:]]*:[[:space:]]*1") expect_output(print(reg), "Algorithms[[:space:]]*:[[:space:]]*1") }) ================================================ FILE: tests/testthat/test_Job.R ================================================ test_that("Job", { reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, i = 1:3, reg = reg, more.args = list(x = 1)) submitAndWait(reg, 1, resources = list(foo = "bar")) job = makeJob(reg = reg, i = 1) expect_is(job, "Job") expect_identical(job$id, 1L) expect_equal(job$pars, list(i = 1L, x = 1)) expect_count(job$seed) expect_list(job$resources, names = "named") expect_equal(job$resources$foo, "bar") expect_function(job$fun) jc = makeJobCollection(reg = reg, resources = list(foo = "bar")) job = getJob(jc, i = 1L) expect_is(job, "Job") expect_identical(job$id, 1L) expect_equal(job$pars, list(i = 1L, x = 1)) expect_count(job$seed) expect_list(job$resources, names = "named") expect_equal(job$resources$foo, "bar") expect_function(job$fun) }) test_that("Experiment", { reg = makeTestExperimentRegistry() addProblem(reg = reg, "p1", fun = function(job, data, ...) list(data = data, ...)) addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) length(instance)) ids = addExperiments(list(p1 = data.table(i = 1:3)), list(a1 = data.table()), reg = reg) job = makeJob(1, reg = reg) expect_is(job, "Experiment") expect_identical(job$id, 1L) expect_equal(job$pars, list(prob.pars = list(i = 1), algo.pars = list())) expect_count(job$repl) expect_count(job$seed) expect_list(job$resources, names = "named") expect_is(job$problem, "Problem") expect_is(job$algorithm, "Algorithm") expect_identical(job$instance, list(data = NULL, i = 1L)) jc = makeJobCollection(reg = reg) job = getJob(jc, i = 1L) expect_is(job, "Experiment") expect_identical(job$id, 1L) expect_equal(job$pars, list(prob.pars = list(i = 1), algo.pars = list())) expect_count(job$seed) expect_list(job$resources, names = "named") expect_is(job$problem, "Problem") expect_is(job$algorithm, "Algorithm") expect_identical(job$instance, list(data = NULL, i = 1L)) }) test_that("External directory is created", { reg = makeTestRegistry() fun = function(..., .job) .job$external.dir ids = batchMap(fun, i = 1:3, reg = reg, more.args = list(x = 1)) submitAndWait(reg) expect_directory_exists(unwrap(reduceResultsDataTable(1:3, reg = reg))[[2]]) reg = makeTestExperimentRegistry() addProblem(reg = reg, "p1", fun = function(job, data, ...) list(data = data, ...)) addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) { saveRDS(job$id, file = fs::path(job$external.dir, sprintf("%s.rds", job$id)), version = 2L) job$external.dir }) ids = addExperiments(list(p1 = data.table(i = 1:3)), list(a1 = data.table()), reg = reg) submitAndWait(reg, c(1, 3)) paths = reduceResultsList(1:3, missing.val = NULL, reg = reg) expect_directory_exists(paths[[1]]) expect_true(fs::file_exists(fs::path(reg$file.dir, "external", "1", "1.rds"))) expect_null(paths[[2]]) expect_false(fs::dir_exists(fs::path(reg$file.dir, "external", "2"))) expect_directory_exists(paths[[3]]) expect_true(fs::file_exists(fs::path(reg$file.dir, "external", "3", "3.rds"))) expect_equal(reduceResultsList(1:3, fun = function(job, ...) job$external.dir, reg = reg, missing.val = NULL), paths) resetJobs(3, reg = reg) expect_false(fs::dir_exists(fs::path(reg$file.dir, "external", "3"))) expect_true(fs::dir_exists(fs::path(reg$file.dir, "external", "1"))) # directory is persistent between submits? addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) { list.files(job$external.dir) }) submitAndWait(reg, 1) sweepRegistry(reg = reg) expect_true(fs::file_exists(fs::path(reg$file.dir, "external", "1", "1.rds"))) expect_identical(loadResult(1, reg = reg), "1.rds") }) ================================================ FILE: tests/testthat/test_JobCollection.R ================================================ test_that("makeJobCollection", { reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, i = 1:3, reg = reg, more.args = list(x = 1)) jc = makeJobCollection(ids, resources = list(foo = 42), reg = reg) expect_environment(jc, c("file.dir", "job.hash", "jobs", "log.file", "packages", "resources", "uri", "work.dir")) expect_directory_exists(jc$file.dir) expect_string(jc$job.hash, pattern = "^job[[:alnum:]]{32}$") expect_data_table(jc$jobs, key = "job.id") expect_string(jc$log.file) expect_character(jc$packages, any.missing = FALSE) expect_list(jc$resources, names = "unique") expect_string(jc$uri) expect_directory_exists(jc$work.dir) expect_list(jc$jobs$job.pars) expect_string(jc$array.var, na.ok = TRUE) expect_flag(jc$array.jobs) expect_output(print(jc), "Collection") }) test_that("makeJobCollection does not expand relative paths", { skip_on_os("windows") reg = makeTestRegistry(file.dir = NA, make.default = FALSE) batchMap(identity, 1, reg = reg) reg$file.dir = fs::path_abs("~/foo") reg$work.dir = fs::path_abs("~/bar") expect_string(reg$file.dir, pattern = "^~") expect_string(reg$work.dir, pattern = "^~") jc = makeJobCollection(1, reg = reg) expect_true(stri_startswith_fixed(jc$file.dir, "~/foo")) expect_true(stri_startswith_fixed(jc$uri, "~/foo/jobs/")) expect_true(stri_startswith_fixed(jc$log.file, "~/foo/logs")) expect_true(stri_startswith_fixed(jc$work.dir, "~/bar")) }) test_that("makeJobCollection.ExperimentCollection", { reg = makeTestExperimentRegistry() addProblem(reg = reg, "p1", fun = function(job, data, ...) list(data = data, ...)) addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) length(instance)) ids = addExperiments(list(p1 = data.table(i = 1:3)), list(a1 = data.table()), reg = reg) jc = makeJobCollection(ids, resources = list(foo = 42), reg = reg) expect_directory_exists(jc$file.dir) expect_string(jc$job.hash, pattern = "^job[[:alnum:]]{32}$") expect_data_table(jc$jobs, key = "job.id") expect_string(jc$log.file) expect_character(jc$packages, any.missing = FALSE) expect_list(jc$resources, names = "unique") expect_string(jc$uri) expect_directory_exists(jc$work.dir) expect_list(jc$jobs$prob.pars) expect_list(jc$jobs$algo.pars) expect_character(jc$jobs$problem) expect_character(jc$jobs$algorithm) expect_string(jc$array.var, na.ok = TRUE) expect_flag(jc$array.jobs) expect_is(jc, "ExperimentCollection") }) test_that("chunks.as.arrayjobs is stored", { reg = makeTestRegistry(file.dir = NA, make.default = FALSE) ids = batchMap(identity, 1:2, reg = reg) resources = list(chunks.as.arrayjobs = TRUE) jc = makeJobCollection(ids, resources = resources, reg = reg) expect_true(jc$array.jobs) }) ================================================ FILE: tests/testthat/test_JobNames.R ================================================ test_that("setJobNames", { reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, i = 1:3, reg = reg) expect_null(setJobNames(ids, letters[1:3], reg = reg)) x = getJobNames(reg = reg) expect_data_table(x, ncol = 2, nrow = 3, key = "job.id") expect_identical(x$job.name, letters[1:3]) expect_identical(reg$status$job.name, letters[1:3]) expect_data_table(ijoin(getJobNames(1:2, reg = reg), getJobPars(reg = reg)), ncol = 3, nrow = 2, key = "job.id") jc = makeJobCollection(1, reg = reg) expect_identical(jc$job.name, "a") jc = makeJobCollection(1:3, reg = reg) expect_identical(jc$job.name, "a") expect_identical(jc$jobs$job.name, letters[1:3]) expect_null(setJobNames(ids, rep(NA_character_, 3), reg = reg)) x = getJobNames(reg = reg) expect_data_table(x, ncol = 2, nrow = 3, key = "job.id") expect_identical(x$job.name, rep(NA_character_, 3)) jc = makeJobCollection(1:3, reg = reg) expect_identical(jc$job.name, jc$job.hash) }) ================================================ FILE: tests/testthat/test_Problem.R ================================================ test_that("addProblem / removeProblem", { reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", data = iris, fun = function(job, data, ...) nrow(data)) expect_is(prob, "Problem") expect_equal(prob$data, iris) expect_equal(prob$name, "p1") expect_function(prob$fun) expect_null(prob$seed) expect_file_exists(getProblemURI(reg, prob$name)) expect_false(prob$cache) expect_false(fs::dir_exists(getProblemCacheDir(reg, "p1"))) prob = addProblem(reg = reg, "p2", fun = function(...) NULL, seed = 42, cache = TRUE) expect_is(prob, "Problem") expect_null(prob$data, NULL) expect_equal(prob$name, "p2") expect_function(prob$fun) expect_identical(prob$seed, 42L) expect_file_exists(getProblemURI(reg, prob$name)) expect_true(prob$cache) expect_directory_exists(getProblemCacheDir(reg, "p2")) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) NULL) prob.designs = list(p1 = data.table(), p2 = data.table()) algo.designs = list(a1 = data.table()) ids = addExperiments(prob.designs, algo.designs, repls = 2, reg = reg) expect_integer(ids$job.id, len = 4L) removeProblems(reg = reg, "p1") expect_directory_exists(getProblemCacheDir(reg, "p2")) expect_integer(reg$status$job.id, len = 2L) expect_set_equal("p2", reg$problems) expect_false(fs::file_exists(getProblemURI(reg, "p1"))) expect_true(fs::file_exists(getProblemURI(reg, "p2"))) expect_set_equal(getJobPars(reg = reg)$problem, "p2") checkTables(reg) removeProblems(reg = reg, "p2") expect_false(fs::dir_exists(getProblemCacheDir(reg, "p2"))) }) test_that("instance caching", { reg = makeTestExperimentRegistry() addProblem(reg = reg, "p1", data = iris, fun = function(job, data, param) param * 10 + runif(1), seed = 1, cache = TRUE) addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) list(result = instance)) addAlgorithm(reg = reg, "a2", fun = function(job, data, instance, ...) list(result = instance)) ids = addExperiments(prob.designs = list(p1 = data.table(param = 1:2)), repls = 2, reg = reg) j = makeJob(1, reg = reg) foo = j$instance expect_file_exists(getProblemCacheURI(j)) submitAndWait(reg = reg) tab = unwrap(ljoin(getJobTable(reg = reg)[, c("job.id", "repl", "problem", "prob.pars", "algorithm")], reduceResultsDataTable(reg = reg))) expect_equal(tab[, list(v = var(result)), by = c("param", "problem", "repl")]$v, rep(0, 4)) }) ================================================ FILE: tests/testthat/test_Registry.R ================================================ test_that("makeRegistry", { reg = makeTestRegistry() expect_is(reg, "Registry") expect_true(is.environment(reg)) expect_directory_exists(reg$file.dir, access = "rw") expect_directory_exists(reg$work.dir, access = "r") expect_directory_exists(fs::path(reg$file.dir, c("jobs", "results", "updates", "logs"))) expect_file_exists(fs::path(reg$file.dir, "registry.rds")) expect_character(reg$packages, any.missing = FALSE) expect_character(reg$namespaces, any.missing = FALSE) expect_int(reg$seed, na.ok = FALSE) expect_true(reg$writeable) expect_is(reg$cluster.functions, "ClusterFunctions") expect_list(reg$default.resources, names = "strict") checkTables(reg, any.missing = FALSE, nrows = 0L) reg = makeTestRegistry(packages = "checkmate", seed = 123) expect_equal(reg$packages, "checkmate") expect_int(reg$seed) expect_identical(reg$seed, 123L) expect_output(print(reg), "Registry") }) test_that("reading conf file", { fn = fs::file_temp("conf") writeLines(con = fn, "default.resources = list(walltime = 42)") reg = makeTestRegistry(conf.file = fn) expect_identical(reg$default.resources, list(walltime = 42)) fs::file_delete(fn) }) test_that("make.default does work", { prev = batchtools$default.registry setDefaultRegistry(NULL) expect_error(getDefaultRegistry(), "No default") reg = makeTestRegistry(make.default = TRUE, seed = 123) expect_equal(reg$seed, 123L) reg = makeTestRegistry(make.default = FALSE, seed = 124) expect_equal(reg$seed, 124L) expect_class(getDefaultRegistry(), "Registry") expect_equal(getDefaultRegistry()$seed, 123L) batchtools$default.registry = prev }) test_that("extra files are loaded", { wd = fs::file_temp() fs::dir_create(fs::path(wd, "subdir")) # define some files to source/load fn = list(source = fs::path(wd, "src_file.r"), load = fs::path(wd, "subdir", "load_file.RData")) writeLines("x_from_source = 123", con = fn$source) x_from_load = 321 save(x_from_load, file = fn$load) rm(x_from_load) reg = makeTestRegistry(work.dir = wd, source = fn$source, load = fn$load) expect_identical(get("x_from_source", .GlobalEnv), 123) expect_identical(get("x_from_load", .GlobalEnv), 321) rm("x_from_source", envir = .GlobalEnv) rm("x_from_load", envir = .GlobalEnv) reg = makeTestRegistry(work.dir = wd, source = fs::path_file(fn$source), load = fs::path("subdir", fs::path_file(fn$load))) expect_identical(get("x_from_source", .GlobalEnv), 123) expect_identical(get("x_from_load", .GlobalEnv), 321) rm("x_from_source", envir = .GlobalEnv) rm("x_from_load", envir = .GlobalEnv) fs::dir_delete(wd) }) test_that("loadRegistry", { regs = list( makeTestRegistry(), makeTestExperimentRegistry() ) for (reg1 in regs) { fd = reg1$file.dir setDefaultRegistry(NULL) reg2 = loadRegistry(fd, make.default = FALSE, writeable = TRUE) checkTables(reg1) checkTables(reg2) nms = union(ls(reg1, all.names = TRUE), ls(reg2, all.names = TRUE)) nms = chsetdiff(nms, "hash") for (nm in nms) expect_equal(reg1[[nm]], reg2[[nm]], info = nm) x = readRDS(fs::path(fd, "registry.rds")) expect_null(x$cluster.functions) expect_null(x$default.resources) expect_null(x$temp.dir) expect_null(x$mtime) expect_null(x$writeable) } }) test_that("loadRegistry with missing dependencies is still usable (#122)", { expect_warning(reg <- makeTestRegistry(source = fs::file_temp()), "Failed to source") saveRegistry(reg) expect_warning(loadRegistry(reg$file.dir, writeable = TRUE), "Failed to source") batchMap(identity, 1, reg = reg) expect_error(testJob(1, external = FALSE, reg = reg), "Failed to source file") }) test_that("loadRegistry after early node error still usable (#135)", { reg = makeTestRegistry() batchMap(identity, 1:2, reg = reg) jc = makeJobCollection(1, reg = reg) jc$packages = "not_existing_package" suppressAll(doJobCollection(jc)) expect_character(list.files(fs::path(reg$file.dir, "updates")), len = 1L) expect_true(syncRegistry(reg = reg)) expect_string(getErrorMessages(reg = reg)$message, fixed = "not_existing_package") }) test_that("syncRegistry skips broken update files)", { reg = makeTestRegistry() p = dir(reg, "updates") fs::file_create(fs::path(p, "foo.rds")) fs::dir_ls(p) expect_message(syncRegistry(reg = reg), "Skipping") }) test_that("clearRegistry", { reg = makeTestRegistry() reg$foo = TRUE ids = batchMap(identity, 1:3, reg = reg) addJobTags(1:2, "bar", reg = reg) ids[, chunk := chunk(job.id, n.chunks = 2)] submitAndWait(reg, ids) clearRegistry(reg) checkTables(reg, nrow = 0L) expect_identical(list.files(dir(reg, "jobs")), character(0)) expect_identical(list.files(dir(reg, "logs")), character(0)) expect_identical(list.files(dir(reg, "results")), character(0)) expect_identical(list.files(dir(reg, "updates")), character(0)) expect_false(fs::file_exists(fs::path(reg$file.dir, "user.function.rds"))) expect_identical(batchMap(identity, 1:4, reg = reg), data.table(job.id = 1:4, key = "job.id")) expect_true(reg$foo) }) test_that("read only mode", { f = function(x) if (x == 3) stop(3) else x reg = makeTestRegistry() batchMap(f, 1:4, reg = reg) submitAndWait(ids = 1:3, reg) # simulate that job 4 has been started but is not terminated yet jc = makeJobCollection(4L, reg = reg) suppressAll({doJobCollection(jc, jc$log.file)}) reg$status[job.id == 4L, job.hash := jc$job.hash] saveRegistry(reg = reg) reg = loadRegistry(reg$file.dir, writeable = FALSE) # query status expect_class(getStatus(reg = reg), "Status") expect_data_table(findDone(reg = reg), nrow = 3) expect_data_table(findErrors(reg = reg), nrow = 1) expect_character(fs::dir_ls(fs::path(reg$file.dir, "updates")), len = 1L) # load results expect_identical(loadResult(1L, reg = reg), 1L) expect_identical(reduceResultsList(reg = reg), as.list(c(1:2, 4L))) expect_character(fs::dir_ls(fs::path(reg$file.dir, "updates")), len = 1L) # inspect errors expect_data_table(getErrorMessages(reg = reg), nrow = 1) expect_character(getLog(3L, reg = reg)) expect_character(getLog(4L, reg = reg)) # try to write expect_error(sweepRegistry(reg = reg), "writeable") expect_error(setJobNames(ids = 1L, reg = reg), "writeable") expect_error(addJobTags(ids = 1L, "a", reg = reg), "writeable") expect_error(resetJobs(reg = reg), "writeable") expect_error(clearRegistry(reg = reg), "writeable") expect_error(removeRegistry(reg = reg), "writeable") expect_error(killJobs(reg = reg), "writeable") expect_directory_exists(reg$file.dir) expect_character(fs::dir_ls(fs::path(reg$file.dir, "updates")), len = 1L) # same stuff for ExperimentRegistry reg = makeTestExperimentRegistry() addProblem("foo", data = 1, reg = reg) addAlgorithm("bar", function(data, instance, ...) instance, reg = reg) addExperiments(reg = reg) reg$writeable = FALSE expect_data_table(summarizeExperiments(reg = reg), nrow = 1L) expect_data_table(findExperiments(reg = reg)) expect_error(addProblem("foo2", iris, reg = reg), "writeable") expect_error(removeProblems("foo2", reg = reg), "writeable") expect_error(addAlgorithm("bar2", function(data, instance, ...) instance, reg = reg), "writeable") expect_error(removeAlgorithms("bar2", reg = reg), "writeable") expect_error(addExperiments(reg = reg), "writeable") expect_error(removeExperiments(1, reg = reg), "writeable") }) test_that("xz compression", { fn = fs::file_temp("conf") writeLines(con = fn, "compress = \"xz\"") reg = makeTestRegistry(conf.file = fn) expect_identical(reg$compress, "xz") fd = file(dir(reg, "registry.rds"), "r") expect_identical(summary(fd)$class, "xzfile") close(fd) batchMap(identity, 1:3, reg = reg) submitAndWait(reg) fd = file(getResultFiles(reg, 1), "r") expect_identical(summary(fd)$class, "xzfile") close(fd) }) ================================================ FILE: tests/testthat/test_addExperiments.R ================================================ test_that("addExperiments handles parameters correctly", { reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", data = iris, fun = function(job, data, x, y, ...) stopifnot(is.numeric(x) && is.character(y)), seed = 42) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, a, b, ...) { print(str(a)); checkmate::assertList(a, len = 1, names = "named"); checkmate::assertDataFrame(b); } ) prob.designs = list(p1 = data.table(x = 1:2, y = letters[1:2])) algo.designs = list(a1 = data.table(a = list(list(x = 1)), b = list(iris))) repls = 2 ids = addExperiments(prob.designs, algo.designs, repls = repls, reg = reg) expect_data_table(ids, nrow = 4, key = "job.id") ids = addExperiments(prob.designs, algo.designs, repls = repls, reg = reg) expect_data_table(ids, nrow = 0, key = "job.id") ids = addExperiments(prob.designs, algo.designs, repls = repls + 1L, reg = reg) expect_data_table(ids, nrow = 2, key = "job.id") submitAndWait(reg, ids) expect_true(nrow(findErrors(reg = reg)) == 0) }) test_that("addExperiments creates default designs", { reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", data = iris) prob = addProblem(reg = reg, "p2", data = cars) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance) nrow(data)) algo = addAlgorithm(reg = reg, "a2", fun = function(job, data, instance) ncol(data)) ids = addExperiments(reg = reg) expect_equal(findExperiments(reg = reg)$job.id, 1:4) expect_equal(as.character(reg$defs$problem), rep(c("p1", "p2"), each = 2)) expect_equal(as.character(reg$defs$algorithm), rep(c("a1", "a2"), times = 2)) }) test_that("addExperiments / user provided designs", { reg = makeTestExperimentRegistry() addProblem(reg = reg, "p1", data = iris, fun = function(...) list(...)) addProblem(reg = reg, "p2", data = cars, fun = function(...) list(...)) addAlgorithm(reg = reg, "a1", fun = function(...) list(...)) addAlgorithm(reg = reg, "a2", fun = function(...) ncol(data)) prob.designs = list(p1 = data.table(a = 1, b = 2:4)) algo.designs = list(a1 = data.table(c = 3:8), a2 = data.table()) ids = addExperiments(reg = reg, prob.designs = prob.designs, algo.designs = algo.designs, combine = "bind") expect_data_table(ids, nrow = 9, key = "job.id") tab = getJobPars(reg = reg) pars = unwrap(getJobPars(reg = reg)) expect_set_equal(pars$problem, "p1") expect_set_equal(pars$algorithm, c("a1", "a2")) expect_equal(pars$a, rep(1L, 9)) expect_equal(pars$b, rep(2:4, 3)) expect_equal(pars$c, c(3:8, rep(NA, 3))) expect_error(addExperiments(reg = reg, prob.designs = list(p1 = data.table(job = 2))), "reserved keyword 'job'") expect_error(addExperiments(reg = reg, algo.designs = list(a2 = data.table(instance = "foo"))), "reserved keyword 'instance'") prob.designs = c(prob.designs, list(p2 = data.table())) ids = addExperiments(reg = reg, prob.designs = prob.designs, algo.designs = algo.designs, combine = "bind") expect_data_table(ids, nrow = 7, key = "job.id") expect_data_table(unwrap(getJobPars(reg = reg)), nrow = 16) ids = addExperiments(reg = reg, prob.designs = prob.designs, algo.designs = algo.designs, combine = "crossprod") expect_data_table(ids, nrow = 12, key = "job.id") expect_data_table(unwrap(getJobPars(reg = reg)), nrow = 28) if (getRversion() < "4.0.0") { pd = list(p1 = data.frame(foo = letters[1:2])) withr::with_options(list(stringsAsFactors = NULL), { expect_warning(addExperiments(reg = reg, prob.designs = pd), "stringsAsFactors") }) withr::with_options(list(stringsAsFactors = TRUE), { expect_warning(addExperiments(reg = reg, prob.designs = pd), "stringsAsFactors") }) withr::with_options(list(stringsAsFactors = FALSE), { addExperiments(reg = reg, prob.designs = pd) }) } }) # reg = makeTestExperimentRegistry() # addProblem(reg = reg, "p1", data = iris, fun = function(job, data, ...) nrow(data)) # addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) NULL) # addAlgorithm(reg = reg, "a2", fun = function(job, data, instance, ...) NULL) # prob.designs = list(p1 = data.table(x = 1:500)) # algo.designs = list(a1 = data.table(y = 1:50), a2 = data.table(y = 1:20)) # repls = 2 # profvis::profvis(addExperiments(prob.designs, algo.designs = algo.designs, repls = repls, reg = reg)) # ids = findExperiments(reg = reg) # profvis::profvis(submitJobs(ids = s.chunk(ids), reg = reg)) # profvis::profvis(unwrap(getJobPars(reg = reg))) ================================================ FILE: tests/testthat/test_batchMap.R ================================================ test_that("batchMap", { reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, i = 1:3, more.args = list(x = 1), reg = reg) expect_data_table(ids, any.missing = FALSE, ncols = 1L, nrow = 3L, key = "job.id") expect_equal(ids$job.id, 1:3) if (getRversion() < "4.1.0") expect_equal(readRDS(fs::path(reg$file.dir, "user.function.rds")), fun) expect_equal(readRDS(fs::path(reg$file.dir, "more.args.rds")), list(x = 1)) checkTables(reg) expect_data_table(reg$defs, nrow = 3L, any.missing = FALSE) expect_data_table(reg$status, nrow = 3L) expect_data_table(reg$resources, nrow = 0L) expect_equal(reg$defs$job.pars[[1L]], list(i = 1)) reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, 1:3, more.args = list(j = 1), reg = reg) expect_equal(readRDS(fs::path(reg$file.dir, "more.args.rds")), list(j = 1)) expect_equal(reg$defs$job.pars, lapply(1:3, list)) reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, args = list(1:3), more.args = list(j = 1), reg = reg) expect_equal(readRDS(fs::path(reg$file.dir, "more.args.rds")), list(j = 1)) expect_equal(reg$defs$job.pars, lapply(1:3, list)) reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, i = 1:3, j = 1L, reg = reg) expect_identical(reg$defs$job.pars[[3L]], list(i = 3L, j = 1L)) reg = makeTestRegistry() fun = function(...) list(...) cj = CJ(a = 1:3, b = letters[1:3]) ids = batchMap(fun, args = cj, reg = reg) expect_data_table(ids, nrow = 9, key = "job.id") expect_equivalent(unwrap(getJobPars(reg = reg))[, c("a", "b")], cj) }) test_that("batchMap with unnamed more.args (#267)", { reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, 1:3, more.args = list(j = 1L, 5L), reg = reg) expect_equal(readRDS(fs::path(reg$file.dir, "more.args.rds")), list(j = 1L, 5L)) submitAndWait(reg) expect_equal(loadResult(1, reg), list(1L, j = 1L, 5L)) }) ================================================ FILE: tests/testthat/test_batchReduce.R ================================================ test_that("batchReduce", { reg = makeTestRegistry() xs = 1:20 ids = batchReduce(function(aggr, x) aggr+x, xs, init = 0, chunks = chunk(seq_along(xs), n.chunks = 10), reg = reg) expect_data_table(ids, nrow = 10, key = "job.id") submitAndWait(ids = ids, reg = reg) y = reduceResults(fun = function(aggr, job, res) aggr+res, init = 0, reg = reg) expect_equal(y, sum(1:20)) }) test_that("batchReduce w/ more.args", { reg = makeTestRegistry() xs = 1:20 chunks = sort(chunk(seq_along(xs), n.chunks = 10)) ids = batchReduce(function(aggr, x, y) aggr+x+y, 1:20, init = 100, chunks = chunks, more.args = list(y=1), reg = reg) expect_data_table(ids, nrow = 10, key = "job.id") submitAndWait(reg = reg) expect_equal(as.integer(reduceResultsList(reg = reg)), viapply(split(xs, chunks), function(x) 100L + length(x) + sum(x), use.names = FALSE)) y = reduceResults(fun=function(aggr, job, res) aggr+res, init = 0, reg = reg) expect_equal(y, sum(1:20) + 20 + uniqueN(chunks) * 100) }) ================================================ FILE: tests/testthat/test_btlapply.R ================================================ test_that("btlapply", { reg = makeTestRegistry() fun = function(x, y) x^y res = silent(btlapply(1:3, fun, y = 2, n.chunks = 2, resources = list(..dummy = 42), reg = reg)) expect_equal(res, lapply(1:3, fun, y = 2)) expect_equal(uniqueN(reg$status$job.hash), 2) expect_equal(reg$resources$resources[[1L]]$..dummy, 42) }) test_that("btmapply", { fun = function(x, y) paste0(x, y) x = 1:3 y = letters[1:3] reg = makeTestRegistry() res = silent(btmapply(fun, x = x, y = y, chunk.size = 2, use.names = FALSE, reg = reg)) expect_equal(res, mapply(fun, x = x, y = y, SIMPLIFY = FALSE, USE.NAMES = FALSE)) expect_equal(uniqueN(reg$status$job.hash), 2) reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsInteractive() expect_equal(silent(btmapply(fun, x = x, y = y, n.chunks = 1, use.names = FALSE, simplify = TRUE, reg = reg)), mapply(fun, x = x, y = y, SIMPLIFY = TRUE, USE.NAMES = FALSE)) reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsInteractive() expect_equal(silent(btmapply(fun, x = x, y = y, n.chunks = 1, use.names = TRUE, simplify = TRUE, reg = reg)), mapply(fun, x = x, y = y, SIMPLIFY = TRUE, USE.NAMES = TRUE)) }) ================================================ FILE: tests/testthat/test_chunk.R ================================================ test_that("chunk", { x = 1:10; n.chunks = 2 expect_integer(chunk(x, n.chunks = n.chunks), len = length(x), lower = 1, upper = n.chunks, any.missing = FALSE) x = 1:10; n.chunks = 1 expect_integer(chunk(x, n.chunks = n.chunks), len = length(x), lower = 1, upper = n.chunks, any.missing = FALSE) x = 1:10; n.chunks = 10 expect_integer(chunk(x, n.chunks = n.chunks), len = length(x), lower = 1, upper = n.chunks, any.missing = FALSE) x = 1:10; n.chunks = 20 expect_integer(chunk(x, n.chunks = n.chunks), len = length(x), lower = 1, upper = n.chunks, any.missing = FALSE) x = integer(0); n.chunks = 20 expect_integer(chunk(x, n.chunks = n.chunks), len = length(x), lower = 1, upper = n.chunks, any.missing = FALSE) x = 1:10; chunk.size = 3 res = chunk(x, chunk.size = chunk.size) expect_integer(res, len = length(x), lower = 1, upper = length(x), any.missing = FALSE) expect_integer(table(res), lower = 1, upper = chunk.size, any.missing = FALSE) x = 1:10; chunk.size = 1 res = chunk(x, chunk.size = chunk.size) expect_integer(res, len = length(x), lower = 1, upper = length(x), any.missing = FALSE) expect_integer(table(res), lower = 1, upper = chunk.size, any.missing = FALSE) expect_equal(chunk(numeric(0), chunk.size = 1), integer(0)) expect_equal(chunk(numeric(0), n.chunks = 1), integer(0)) x = 1:10; n.chunks = 2 res = c(rep(1, 5), rep(2, 5)) expect_equal(chunk(x, n.chunks = n.chunks, shuffle = FALSE), res) }) test_that("binpack", { x = 1:10; chunk.size = 10 res = binpack(x, chunk.size = chunk.size) expect_integer(res, len = length(x), lower = 1, upper = length(x), any.missing = FALSE) expect_numeric(sapply(split(x, res), sum), lower = min(x), upper = chunk.size, any.missing = FALSE) x = 1; chunk.size = 10 res = binpack(x, chunk.size = chunk.size) expect_integer(res, len = length(x), lower = 1, upper = length(x), any.missing = FALSE) expect_numeric(sapply(split(x, res), sum), lower = min(x), upper = chunk.size, any.missing = FALSE) x = rep(1, 100); chunk.size = 1 res = binpack(x, chunk.size = chunk.size) expect_integer(res, len = length(x), lower = 1, upper = length(x), any.missing = FALSE) expect_numeric(sapply(split(x, res), sum), lower = min(x), upper = chunk.size, any.missing = FALSE) x = runif(100); chunk.size = 1 res = binpack(x, chunk.size = chunk.size) expect_integer(res, len = length(x), lower = 1, upper = length(x), any.missing = FALSE) expect_numeric(sapply(split(x, res), sum), lower = min(x), upper = chunk.size, any.missing = FALSE) expect_equal(binpack(numeric(0), 1), integer(0)) }) test_that("lpt", { x = 1:10; n.chunks = 2 res = lpt(x, n.chunks) expect_integer(res, len = length(x), lower = 1, upper = n.chunks, any.missing = FALSE) expect_numeric(sapply(split(x, res), sum), len = min(length(x), n.chunks), lower = min(x), any.missing = FALSE) x = runif(100); n.chunks = 3 res = lpt(x, n.chunks) expect_integer(res, len = length(x), lower = 1, upper = n.chunks, any.missing = FALSE) expect_numeric(sapply(split(x, res), sum), len = min(length(x), n.chunks), lower = min(x), any.missing = FALSE) x = 1:10; n.chunks = 1 res = lpt(x, n.chunks) expect_integer(res, len = length(x), lower = 1, upper = n.chunks, any.missing = FALSE) expect_numeric(sapply(split(x, res), sum), len = min(length(x), n.chunks), lower = min(x), any.missing = FALSE) x = 1:10; n.chunks = 12 res = lpt(x, n.chunks) expect_integer(res, len = length(x), lower = 1, upper = n.chunks, any.missing = FALSE) expect_numeric(sapply(split(x, res), sum), len = min(length(x), n.chunks), lower = min(x), any.missing = FALSE) expect_equal(unname(res), 10:1) expect_equal(lpt(numeric(0), 1), integer(0)) }) test_that("caching works", { reg = makeTestExperimentRegistry() p1 = addProblem(reg = reg, "p1", data = iris) p2 = addProblem(reg = reg, "p2", data = data.frame(a = 1:10)) a1 = addAlgorithm(reg = reg, name = "a1", fun = function(data, ...) nrow(data)) a2 = addAlgorithm(reg = reg, name = "a2", fun = function(data, ...) 2L * nrow(data)) addExperiments(reg = reg) ids = findJobs(reg = reg) ids$chunk = 1L submitAndWait(reg, ids) expect_identical(unlist(reduceResultsList(ids, reg = reg)), as.integer(c(150, 300, 10, 20))) }) ================================================ FILE: tests/testthat/test_convertIds.R ================================================ test_that("convertIds", { reg = makeTestRegistry() batchMap(identity, 1:10, reg = reg) reg$status = reg$status[-3, ] tab = convertIds(reg, NULL) expect_equal(tab, NULL) tab = convertIds(reg, 1:10) expect_data_table(tab, ncol = 1, nrow = 9, key = "job.id") expect_copied(tab, reg$status) tab = convertIds(reg, findJobs(reg = reg)) expect_data_table(tab, ncol = 1, nrow = 9, key = "job.id") expect_copied(tab, reg$status) tab = convertIds(reg, data.table(job.id = 3:4, key = "job.id")) expect_data_table(tab, ncol = 1, nrow = 1, key = "job.id") tab = convertIds(reg, as.data.frame(findJobs(reg = reg))) expect_data_table(tab, ncol = 1, key = "job.id") expect_copied(tab, reg$status) tab = convertIds(reg, 10:8) expect_data_table(tab, ncol = 1, nrow = 3, key = "job.id") expect_equal(tab$job.id, 8:10) expect_copied(tab, reg$status) tab = convertIds(reg, 10:8, keep.order = TRUE) expect_data_table(tab, ncol = 1, nrow = 3) expect_equal(tab$job.id, 10:8) ids = findJobs(reg = reg) ids$chunk = 9:1 tab = convertIds(reg, ids, keep.order = TRUE, keep.extra = "chunk") expect_data_table(tab, ncol = 2, nrow = 9, key = "job.id") # keep index if possible setorderv(ids, "chunk") tab = convertIds(reg, ids, keep.order = TRUE, keep.extra = "chunk") expect_data_table(tab, ncol = 2, nrow = 9) expect_null(key(tab)) expect_equal(tab$job.id, setdiff(10:1, 3L)) expect_error(convertIds(reg, c(2, 2)), "Duplicated ids") expect_error(convertIds(reg, as.character(1:3)), "not recognized") # issue #40 ids = ids[list(5:10), on = "job.id"][, "chunk" := chunk(job.id, chunk.size = 3)] ids = convertIds(reg, ids, keep.extra = c("job.id", "chunk")) expect_data_table(ids, any.missing = FALSE) }) ================================================ FILE: tests/testthat/test_count.R ================================================ test_that("count", { expect_identical(count(1:3), 3L) expect_identical(count(integer(0L)), 0L) expect_identical(count(list()), 0L) expect_identical(count(c(TRUE, NA, FALSE)), 2L) expect_identical(count(c(1L, NA, 3L)), 2L) expect_identical(count(c(1., NA, 3.)), 2L) expect_identical(count(c("a", NA, "c")), 2L) expect_identical(count(list(1, NULL, 3)), 2L) }) ================================================ FILE: tests/testthat/test_doJobCollection.R ================================================ test_that("doJobCollection handles bulky log output", { N = 1e5 reg = makeTestRegistry() fun = function(N) print(paste(rep("a", N), collapse = "")) batchMap(fun, N, reg = reg) jc = makeJobCollection(1, reg = reg) fn = fs::file_temp() doJobCollection(jc, output = fn) lines = readLines(fn) expect_true(any(nchar(lines) >= N)) fs::file_delete(fn) }) test_that("doJobCollection truncates error messages", { N = 5000 # R truncates stop() at 2^13 chars reg = makeTestRegistry() fun = function(N) stop(paste(rep("a", N), collapse = "")) batchMap(fun, N, reg = reg) jc = makeJobCollection(1, reg = reg) fn = fs::file_temp() doJobCollection(jc, output = fn) syncRegistry(reg = reg) msg = getErrorMessages(reg = reg)$message expect_true(stri_endswith_fixed(msg, " [truncated]")) fs::file_delete(fn) }) test_that("doJobCollection does not swallow warning messages", { reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsInteractive(external = TRUE) fun = function(x) warning("GREPME") batchMap(fun, 1, reg = reg) submitAndWait(reg, 1) expect_data_table(findErrors(reg = reg), nrow = 0L) expect_data_table(grepLogs(pattern = "GREPME", reg = reg), nrow = 1L) }) test_that("doJobCollection signals slave errors", { reg = makeTestRegistry() fn = fs::file_temp(ext = ".R", tmp_dir = reg$temp.dir) reg$source = fn saveRegistry(reg) assign("y_on_master", 2, envir = .GlobalEnv) writeLines("x <- y_on_master", fn) rm(y_on_master, envir = .GlobalEnv) expect_error(withr::with_dir(reg$work.dir, loadRegistryDependencies(reg, must.work = TRUE)), "y_on_master") batchMap(identity, 1, reg = reg) submitAndWait(reg, 1) expect_data_table(findErrors(reg = reg), nrow = 1) expect_string(getErrorMessages(reg = reg)$message, fixed = "y_on_master") fs::file_delete(fn) }) ================================================ FILE: tests/testthat/test_estimateRuntimes.R ================================================ test_that("estimateRuntimes", { reg = makeTestExperimentRegistry() addProblem(name = "iris", data = iris, fun = function(data, ...) nrow(data), reg = reg) addAlgorithm(name = "nrow", function(instance, ...) nrow(instance), reg = reg) addAlgorithm(name = "ncol", function(instance, ...) ncol(instance), reg = reg) addExperiments(algo.designs = list(nrow = CJ(x = 1:50, y = letters[1:5])), reg = reg) addExperiments(algo.designs = list(ncol = CJ(x = 1:50, y = letters[1:5])), reg = reg) tab = unwrap(getJobPars(reg = reg)) ids = tab[, .SD[sample(nrow(.SD), 4)], by = c("problem", "algorithm", "y")] setkeyv(ids, "job.id") submitAndWait(reg, ids = s.chunk(ids)) # "simulate" runtimes runtime = function(algorithm, x, y) { ifelse(algorithm == "nrow", 100L, 500L) + 100000L * (y %in% letters[1:2]) } reg$status[ids, done := done + tab[ids, runtime(algorithm, x, y), on = "job.id"], on = "job.id"] res = estimateRuntimes(tab, reg = reg) expect_is(res, "RuntimeEstimate") expect_set_equal(names(res), c("runtimes", "model")) expect_is(res$model, "ranger") expect_data_table(res$runtimes, key = "job.id", nrow = nrow(reg$status)) expect_output(print(res), "Runtime Estimate for") # there is a problem with "null value passed as symbol address" # this has probably to do with rcpp/ranger # NB: only during R CMD check, not interactively # expect_output(print(res, n = 5), "Parallel") res = ijoin(res$runtimes, tab) res = res[, list(t = mean(runtime)), by = y] expect_true(all(res[y == "a", t] > res[y %in% c("c", "d", "e"), t])) # remaining is suppressed if nothing more to submit, no error res = estimateRuntimes(unwrap(getJobPars(findDone(reg = reg), reg = reg)), reg = reg) expect_output(print(res, n = 2)) }) ================================================ FILE: tests/testthat/test_export.R ================================================ test_that("export works", { reg = makeTestRegistry() x = batchExport(list(exported_obj = 42L), reg = reg) expect_data_table(x, nrow = 1, ncol = 2) expect_set_equal(names(x), c("name", "uri")) expect_equal(x$name, "exported_obj") expect_file_exists(fs::path(reg$file.dir, "exports", mangle("exported_obj"))) withr::with_dir(reg$work.dir, loadRegistryDependencies(reg)) expect_equal(get("exported_obj", envir = .GlobalEnv), 42L) x = batchExport(reg = reg) expect_data_table(x, nrow = 1, ncol = 2) expect_set_equal(names(x), c("name", "uri")) expect_equal(x$name, "exported_obj") x = batchExport(unexport = "exported_obj", reg = reg) expect_data_table(x, nrow = 0, ncol = 2) expect_set_equal(names(x), c("name", "uri")) expect_false(fs::file_exists(fs::path(reg$file.dir, "exports", mangle("exported_obj")))) x = batchExport(list(exported_obj = 43L), reg = reg) batchMap(function(x) exported_obj + x, 1L, reg = reg) submitAndWait(reg) expect_equal(loadResult(1, reg = reg), 44L) rm("exported_obj", envir = .GlobalEnv) }) test_that("export works with funny variable names", { reg = makeTestRegistry() x = batchExport(list(`%bla%` = function(x, y, ...) 42), reg = reg) expect_data_table(x, nrow = 1, ncol = 2) expect_set_equal(names(x), c("name", "uri")) expect_equal(x$name, "%bla%") expect_file_exists(fs::path(reg$file.dir, "exports", mangle("%bla%"))) withr::with_dir(reg$work.dir, loadRegistryDependencies(reg)) expect_function(get("%bla%", envir = .GlobalEnv)) expect_equal(1 %bla% 2, 42) x = batchExport(unexport = "%bla%", reg = reg) expect_data_table(x, nrow = 0, ncol = 2) expect_set_equal(names(x), c("name", "uri")) expect_false(fs::file_exists(fs::path(reg$file.dir, "exports", mangle("%bla%")))) rm("%bla%", envir = .GlobalEnv) }) ================================================ FILE: tests/testthat/test_findConfFile.R ================================================ test_that("findConfFile", { d = fs::path_real(fs::path_temp()) fn = fs::path(d, "batchtools.conf.R") fs::file_create(fn) withr::with_dir(d, expect_equal(findConfFile(), fs::path_abs(fn)) ) withr::with_envvar(list(R_BATCHTOOLS_SEARCH_PATH = d), expect_equal(findConfFile(), fs::path_abs(fn)) ) fs::file_delete(fn) }) ================================================ FILE: tests/testthat/test_findJobs.R ================================================ none = noIds() test_that("find[Status]", { reg = makeTestRegistry() expect_equal(findJobs(reg = reg), none) expect_equal(findSubmitted(reg = reg), none) expect_equal(findNotSubmitted(reg = reg), none) expect_equal(findStarted(reg = reg), none) expect_equal(findNotStarted(reg = reg), none) expect_equal(findDone(reg = reg), none) expect_equal(findNotDone(reg = reg), none) expect_equal(findErrors(reg = reg), none) expect_equal(findOnSystem(reg = reg), none) expect_equal(findRunning(reg = reg), none) expect_equal(findQueued(reg = reg), none) expect_equal(findExpired(reg = reg), none) fun = function(i) if (i == 3) stop(i) else i ids = batchMap(fun, i = 1:5, reg = reg) all = reg$status[, "job.id"] expect_equal(findJobs(reg = reg), all) expect_equal(findSubmitted(reg = reg), none) expect_equal(findStarted(reg = reg), none) expect_equal(findNotStarted(reg = reg), all) expect_equal(findDone(reg = reg), none) expect_equal(findNotDone(reg = reg), all) expect_equal(findErrors(reg = reg), none) expect_equal(findOnSystem(reg = reg), none) expect_equal(findRunning(reg = reg), none) expect_equal(findQueued(reg = reg), none) expect_equal(findExpired(reg = reg), none) submitAndWait(reg, ids) expect_equal(findJobs(reg = reg), all) expect_equal(findSubmitted(reg = reg), all) expect_equal(findNotSubmitted(reg = reg), none) expect_equal(findStarted(reg = reg), all) expect_equal(findNotStarted(reg = reg), none) expect_equal(findDone(reg = reg), all[-3L]) expect_equal(findNotDone(reg = reg), all[3L]) expect_equal(findErrors(reg = reg), all[3L]) expect_equal(findOnSystem(reg = reg), none) expect_equal(findRunning(reg = reg), none) expect_equal(findQueued(reg = reg), none) expect_equal(findExpired(reg = reg), none) }) test_that("Subsetting", { reg = makeTestRegistry() fun = function(i) if (i == 3) stop(i) else i ids = batchMap(fun, i = 1:5, reg = reg) submitAndWait(reg, ids) all = reg$status[, "job.id"] expect_equal(findJobs(ids = 1:3, reg = reg), all[1:3]) expect_equal(findDone(ids = 3, reg = reg), none) expect_equal(findErrors(ids = 1:2, reg = reg), none) expect_equal(findSubmitted(1:5, reg = reg), all) expect_data_table(findSubmitted(6, reg = reg), ncol = 1L, nrow = 0L) }) test_that("findJobs", { reg = makeTestRegistry() fun = function(i, j) i + j ids = batchMap(fun, i = 1:5, j = c(2, 2, 3, 4, 4), reg = reg) all = reg$status[, "job.id"] expect_equal(findJobs(i == 1, reg = reg), all[1]) expect_equal(findJobs(i >= 3, reg = reg), all[3:5]) expect_equal(findJobs(i >= 3 & j > 3, reg = reg), all[4:5]) xi = 2 expect_equal(findJobs(i == xi, reg = reg), all[2]) }) test_that("findOnSystem", { reg = makeTestRegistry() if (is.null(reg$cluster.functions$listJobsRunning)) skip("Test requires listJobsRunning") silent({ ids = batchMap(reg = reg, Sys.sleep, c(10, 10)) submitJobs(reg = reg, ids = s.chunk(ids)) expect_equal(findOnSystem(reg = reg), findJobs(reg = reg)) expect_equal(findExpired(reg = reg), noIds()) # ensure that the registry is not removed before jobs have finished waitForJobs(reg = reg, sleep = 1) }) }) test_that("findExperiments", { reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", fun = function(job, data, n, ...) mean(runif(n)), seed = 42) prob = addProblem(reg = reg, "p2", data = iris, fun = function(job, data) nrow(data)) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, sq) instance^sq) prob.designs = list(p1 = data.table(n = c(10, 20)), p2 = data.table()) algo.designs = list(a1 = data.table(sq = 1:3)) repls = 10 addExperiments(prob.designs, algo.designs, repls = repls, reg = reg) tab = findExperiments(reg = reg) expect_data_table(tab, nrow = 90, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, prob.name = "p1") expect_data_table(tab, nrow = 60, ncol = 1, key = "job.id") expect_error(findExperiments(reg = reg, prob.name = c("p1", "p2")), "length 1") tab = findExperiments(reg = reg, prob.pattern = "p.") expect_data_table(tab, nrow = 90, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, prob.pattern = "2$") expect_data_table(tab, nrow = 30, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, prob.pattern = "p1", algo.pattern = "a1", repls = 3:4) expect_data_table(tab, nrow = 12, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, prob.pattern = c("^p")) expect_data_table(tab, nrow = 90, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, prob.name = "p2") expect_data_table(tab, nrow = 30, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, ids = 1:10, prob.name = "p1") expect_data_table(tab, nrow = 10, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, algo.name = "a1") expect_data_table(tab, nrow = 90, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, prob.name = "p1", prob.pars = n == 10) expect_data_table(tab, nrow = 30, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, algo.pars = sq == 2) expect_data_table(tab, nrow = 30, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, algo.name = "a1") expect_data_table(tab, nrow = 90, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, algo.pattern = "a.") expect_data_table(tab, nrow = 90, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, prob.name = "p") expect_data_table(tab, nrow = 0, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, algo.name = "a") expect_data_table(tab, nrow = 0, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, prob.name = "xxx") expect_data_table(tab, nrow = 0, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, algo.name = "xxx") expect_data_table(tab, nrow = 0, ncol = 1, key = "job.id") tab = findExperiments(reg = reg, repls = 1:2) expect_data_table(tab, nrow = 18, ncol = 1, key = "job.id") }) ================================================ FILE: tests/testthat/test_foreach.R ================================================ test_that("foreach/seq", { skip_if_not_installed("foreach") reg = makeTestRegistry(packages = "foreach") fun = function(i) { foreach(j = 1:i, .combine = c) %dopar% { j^2 } } ids = batchMap(fun, i = 1:2, reg = reg) submitAndWait(reg, ids = ids, resources = list(foreach.backend = "seq", ncpus = 2)) expect_equal(nrow(findDone(reg = reg)), 2L) expect_equal(reduceResultsList(reg = reg), list(1, c(1, 4))) }) test_that("foreach/multicore", { skip_if_not_installed("foreach") skip_if_not_installed("doParallel") reg = makeTestRegistry(packages = "foreach") if (reg$cluster.functions$name %chin% c("Parallel", "Socket")) skip("Nested local parallelization not supported") fun = function(i) { foreach(j = 1:2) %dopar% { Sys.sleep(3); i } } ids = batchMap(fun, i = 1, reg = reg) submitAndWait(reg, ids = ids, resources = list(foreach.backend = "parallel", ncpus = 2)) expect_equal(nrow(findDone(reg = reg)), 1L) status = getJobStatus(reg = reg) expect_true(status$time.running < 5.9) expect_equal(reduceResultsList(reg = reg), list(as.list(c(1, 1)))) }) ================================================ FILE: tests/testthat/test_future.R ================================================ test_that("futures work", { skip_if_not_installed("future.batchtools") path = Sys.getenv("R_FUTURE_CACHE_PATH") if (!nzchar(path)) Sys.setenv(R_FUTURE_CACHE_PATH = fs::path(fs::path_temp(), ".future")) library("future") library("future.batchtools") plan(batchtools_local) pid %<-% { Sys.getpid() } expect_count(pid) expect_false(pid == Sys.getpid()) }) ================================================ FILE: tests/testthat/test_getErrorMessages.R ================================================ test_that("getErrorMessages", { reg = makeTestRegistry() fun = function(i) if (i == 3) stop("foobar") else i ids = batchMap(fun, i = 1:5, reg = reg) submitAndWait(reg, 1:4) tab = getErrorMessages(ids, reg = reg) expect_data_table(tab, nrow = 5, ncol = 4, key = "job.id") expect_set_equal(names(tab), c("job.id", "terminated", "error", "message")) expect_identical(tab$job.id, 1:5) expect_equal(tab$terminated, c(rep(TRUE, 4), FALSE)) expect_equal(tab$error, replace(logical(5), 3, TRUE)) expect_character(tab$message) expect_equal(is.na(tab$message), !replace(logical(5), 3, TRUE)) expect_string(tab$message[3], fixed = "foobar") tab = getErrorMessages(ids, missing.as.error = TRUE, reg = reg) expect_data_table(tab, nrow = 5, ncol = 4, key = "job.id") expect_set_equal(names(tab), c("job.id", "terminated", "error", "message")) expect_identical(tab$job.id, 1:5) expect_equal(tab$terminated, c(rep(TRUE, 4), FALSE)) expect_equal(tab$error, replace(logical(5), c(3, 5), TRUE)) expect_character(tab$message) expect_equal(is.na(tab$message), !replace(logical(5), c(3, 5), TRUE)) expect_string(tab$message[3], fixed = "foobar") expect_string(tab$message[5], fixed = "[not terminated]") }) ================================================ FILE: tests/testthat/test_getJobTable.R ================================================ test_that("getJobTable.Registry", { reg = makeTestRegistry() fun = function(i, j) i + j ids = batchMap(fun, i = 1:4, j = rep(1, 4), reg = reg) tab = getJobTable(reg = reg) expect_data_table(tab, nrows = 4, ncols = 15, key = "job.id") expect_list(tab$job.pars) expect_equal(tab$job.pars[[1]], list(i = 1L, j = 1)) tab = unwrap(tab) expect_data_table(tab, nrows = 4, ncols = 15, key = "job.id") expect_null(tab[["job.pars"]]) expect_equal(tab$i, 1:4) expect_equal(tab$j, rep(1, 4)) expect_is(tab$submitted, "POSIXct") expect_is(tab$started, "POSIXct") expect_is(tab$done, "POSIXct") expect_is(tab$time.queued, "difftime") expect_numeric(tab$time.queued, lower = 0) expect_is(tab$time.running, "difftime") expect_numeric(tab$time.running, lower = 0) expect_character(tab$tags) expect_true(allMissing(tab$tags)) tab = unwrap(getJobTable(reg = reg), sep = ".") expect_null(tab[["job.pars"]]) expect_equal(tab$job.pars.i, 1:4) expect_equal(tab$job.pars.j, rep(1, 4)) # be sure that the original tables are untouched checkTables(reg) submitAndWait(reg = reg, ids = s.chunk(ids), resources = list(my.walltime = 42L)) addJobTags(2:3, "my_tag", reg = reg) tab = getJobTable(reg = reg) expect_data_table(tab, key = "job.id") expect_copied(tab, reg$status) expect_is(tab$submitted, "POSIXct") expect_is(tab$started, "POSIXct") expect_is(tab$done, "POSIXct") expect_is(tab$time.queued, "difftime") expect_numeric(tab$time.queued, lower = 0) expect_is(tab$time.running, "difftime") expect_numeric(tab$time.running, lower = 0) expect_character(tab$tags, min.len = 1L) tab = getJobResources(reg = reg) expect_data_table(tab, nrow = 4, ncols = 2, key = "job.id") expect_copied(tab, reg$resources) expect_set_equal(tab$resource.hash[1], tab$resource.hash) expect_list(tab$resources) expect_true(all(vlapply(tab$resources, function(r) r$my.walltime == 42))) tab = unwrap(getJobResources(reg = reg)) expect_null(tab[["resources"]]) expect_integer(tab$my.walltime, any.missing = FALSE) }) test_that("getJobPars", { reg = makeTestRegistry() fun = function(i, j) i + j ids = batchMap(fun, i = 1:4, j = rep(1, 4), reg = reg) tab = getJobPars(reg = reg) expect_data_table(tab, nrow = 4, ncol = 2, key = "job.id") tab = unwrap(tab) expect_copied(tab, reg$defs) expect_null(tab$job.pars) expect_equal(tab$i, 1:4) expect_equal(tab$j, rep(1, 4)) tab = unwrap(getJobPars(reg = reg, ids = 1:2)) expect_data_table(tab, nrow = 2, ncol = 3, key = "job.id") tab = unwrap(getJobPars(reg = reg), sep = ".") expect_data_table(tab, nrow = 4, ncol = 3, key = "job.id") expect_equal(tab$job.pars.i, 1:4) expect_equal(tab$job.pars.j, rep(1, 4)) }) test_that("getJobPars with repls", { reg = makeTestExperimentRegistry() prob = addProblem("prob", data = iris, fun = function(data, job) nrow(data), reg = reg) algo = addAlgorithm("algo", fun = function(job, data, instance, i, ...) instance, reg = reg) prob.designs = list(prob = data.table()) algo.designs = list(algo = data.table(i = 1:2)) ids = addExperiments(prob.designs, algo.designs, repls = 3, reg = reg) waitForJobs(reg = reg, sleep = 1) ids[, chunk := chunk(job.id, chunk.size = 2)] submitAndWait(ids = ids, reg = reg) expect_equal(nrow(getJobPars(reg = reg)), nrow(ids)) }) test_that("getJobTable.ExperimentRegistry", { reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", data = iris, fun = function(job, data) nrow(data), seed = 42) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, sq) instance^sq) ids = addExperiments(list(p1 = data.table(k = 1)), list(a1 = data.table(sq = 1:3)), reg = reg) tab = getJobTable(reg = reg) expect_data_table(tab, nrows = 3, ncols = 19, key = "job.id") expect_copied(tab, reg$status) expect_null(tab$job.pars) expect_list(tab$prob.pars) expect_list(tab$algo.pars) for (i in 1:3) { expect_equal(tab$prob.pars[[i]], list(k = 1)) expect_equal(tab$algo.pars[[i]], list(sq = i)) } expect_equal(tab$problem[1], "p1") expect_equal(tab$algorithm[1], "a1") tab = unwrap(getJobTable(ids = 1:3, reg = reg), c("prob.pars", "algo.pars")) expect_data_table(tab, nrows = 3, ncols = 19, key = "job.id") expect_null(tab[["job.pars"]]) expect_set_equal(tab$k, rep(1, 3)) expect_set_equal(tab$sq, 1:3) tab = unwrap(getJobPars(reg = reg), sep = ".") expect_null(tab[["job.pars"]]) expect_set_equal(tab$prob.pars.k, rep(1, 3)) expect_set_equal(tab$algo.pars.sq, 1:3) }) test_that("experiment registry with vector parameters", { tmp = makeTestExperimentRegistry() fun = function(job, data, n, mean, sd, ...) rnorm(sum(n), mean = mean, sd = sd) addProblem("rnorm", fun = fun, reg = tmp) fun = function(instance, ...) sd(instance) addAlgorithm("deviation", fun = fun, reg = tmp) prob.designs = algo.designs = list() prob.designs$rnorm = data.table(expand.grid(n = list(100, 1:4), mean = 0, sd = 1:2)) algo.designs$deviation = data.table() addExperiments(prob.designs, algo.designs, reg = tmp) submitAndWait(reg = tmp) res = getJobPars(reg = tmp) expect_data_table(res, ncol = 5) expect_list(res$prob.pars, len = 4) res = unwrap(res) expect_data_table(res, ncol = 6, nrow = 4, col.names = "unique") expect_list(res$n, len = 4) expect_numeric(res$mean, len = 4, any.missing = FALSE) expect_numeric(res$sd, len = 4, any.missing = FALSE) res = unwrap(res) expect_data_table(res, ncol = 9, nrow = 4, col.names = "unique") }) ================================================ FILE: tests/testthat/test_getStatus.R ================================================ test_that("getStatus", { reg = makeTestRegistry() fun = function(i) if (i == 4) stop("4!") else i ids = batchMap(fun, i = 1:10, reg = reg) submitAndWait(reg, 1:5) stat = getStatus(reg = reg) expect_data_table(stat, any.missing = FALSE, types = "integer", nrows = 1L) expect_equal(stat$defined, 10L) expect_equal(stat$submitted, 5L) expect_equal(stat$started, 5L) expect_equal(stat$done, 4L) expect_equal(stat$error, 1L) expect_equal(stat$queued, 0L) expect_equal(stat$running, 0L) expect_equal(stat$system, 0L) expect_equal(stat$expired, 0L) expect_output(print(stat), "Status for 10 jobs") }) ================================================ FILE: tests/testthat/test_grepLogs.R ================================================ silent({ reg = makeTestRegistry() ids = batchMap(reg = reg, function(x) { if (x == 1) { print("FOOBAR: AAA") } else if (x == 2) { cat("FOOBAR: BBB") } else { if (identical(Sys.getenv("TESTTHAT"), "true")) { # testthat uses muffle restarts which breaks our internal # sink() somehow. # https://github.com/r-lib/testthat/issues/460 cat("FOOBAR: CCC", file = stderr()) } else { message("FOOBAR: CCC") } } invisible(NULL) }, x = 1:5) ids$chunk = as.integer(c(1, 1, 2, 3, 4)) submitAndWait(reg, ids[1:4]) }) test_that("grepLogs", { expect_true(any(grepl("AAA", getLog(1, reg = reg)))) expect_true(any(grepl("BBB", getLog(2, reg = reg)))) expect_true(any(grepl("CCC", getLog(3, reg = reg)))) expect_false(any(grepl("AAA", getLog(2, reg = reg)))) expect_data_table(grepLogs(pattern = "FOOBAR", reg = reg), ncol = 2, key = "job.id") expect_equal(grepLogs(pattern = "FOOBAR", reg = reg)$job.id, 1:4) expect_equal(grepLogs(pattern = "XXX", reg = reg)$job.id, integer(0L)) expect_error(grepLogs(pattern = "", reg = reg), "at least") expect_error(grepLogs(pattern = NA, reg = reg), "not be NA") expect_equal(grepLogs(pattern = "AAA", reg = reg)$job.id, 1L) expect_equal(grepLogs(pattern = "BBB", reg = reg)$job.id, 2L) expect_equal(grepLogs(pattern = "CCC", reg = reg)$job.id, 3:4) expect_equal(grepLogs(pattern = "aaa", reg = reg)$job.id, integer(0L)) expect_equal(grepLogs(pattern = "aaa", ignore.case = TRUE, reg = reg)$job.id, 1L) expect_data_table(grepLogs(pattern = "F..BAR", reg = reg), ncol = 2, nrow = 4, key = "job.id") expect_data_table(grepLogs(pattern = "F..BAR", fixed = TRUE, reg = reg), ncol = 2, nrow = 0, key = "job.id") expect_data_table(grepLogs(1:2, pattern = "CCC", reg = reg), nrow = 0, ncol = 2) expect_data_table(grepLogs(5, pattern = "CCC", reg = reg), nrow = 0, ncol = 2) }) ================================================ FILE: tests/testthat/test_hooks.R ================================================ test_that("hooks", { reg = makeTestRegistry() if (!is.null(reg$cluster.functions$hooks$pre.do.collection) || !is.null(reg$cluster.functions$hooks$post.sync)) skip("Hooks already defined by Cluster Functions") reg$cluster.functions$hooks = insert(reg$cluster.functions$hooks, list( "pre.do.collection" = function(jc, ...) cat(jc$job.hash, "\n", sep = ""), "post.sync" = function(reg, ...) cat("post.syn", file = fs::path(reg$file.dir, "post.sync.txt")) )) jc = makeJobCollection(1, reg = reg) expect_function(jc$hooks$pre.do.collection, args = "jc") fn.ps = fs::path(reg$file.dir, "post.sync.txt") expect_false(fs::file_exists(fn.ps)) batchMap(identity, 1, reg = reg) submitAndWait(reg, 1) syncRegistry(reg = reg) expect_true(fs::file_exists(fn.ps)) lines = getLog(1, reg = reg) expect_true(reg$status[1]$job.hash %chin% lines) }) ================================================ FILE: tests/testthat/test_joins.R ================================================ test_that("joins", { reg = makeTestRegistry() batchMap(identity, x = 1:6, reg = reg) x = unwrap(getJobPars(reg = reg)[1:5]) y = findJobs(x >= 2 & x <= 5, reg = reg) y$extra.col = head(letters, nrow(y)) res = ijoin(x, y) expect_data_table(res, key = "job.id", ncol = 3, any.missing = FALSE) expect_identical(res$job.id, 2:5) expect_copied(res, x) res = ljoin(as.data.frame(x), y) expect_data_table(res, key = "job.id", ncol = 3) expect_identical(res$job.id, 1:5) expect_true(anyMissing(res$extra.col)) expect_copied(res, x) res = rjoin(as.data.frame(x), y) expect_data_table(res, key = "job.id", ncol = 3, any.missing = FALSE) expect_identical(res$job.id, 2:5) expect_copied(res, x) res = rjoin(y, x) expect_data_table(res, key = "job.id", ncol = 3) expect_identical(res$job.id, 1:5) expect_true(anyMissing(res$extra.col)) expect_copied(res, x) res = ojoin(x, y) expect_data_table(res, key = "job.id", ncol = 3) expect_identical(res$job.id, 1:5) expect_true(anyMissing(res$extra.col)) expect_copied(res, x) res = sjoin(x, y) expect_data_table(res, key = "job.id", ncol = 2, any.missing = FALSE) expect_identical(res$job.id, 2:5) expect_copied(res, x) res = sjoin(y, x) expect_data_table(res, key = "job.id", ncol = 2, any.missing = FALSE) expect_identical(res$job.id, 2:5) expect_copied(res, x) res = ajoin(x, y) expect_data_table(res, key = "job.id", ncol = 2, any.missing = FALSE) expect_identical(res$job.id, 1L) expect_copied(res, x) res = ijoin(x, data.frame(job.id = 2:4)) expect_data_table(res, key = "job.id", ncol = 2, any.missing = FALSE) expect_identical(res$job.id, 2:4) expect_copied(res, x) res = ijoin(data.frame(job.id = 2:4), x) expect_data_table(res, key = "job.id", ncol = 2, any.missing = FALSE) expect_identical(res$job.id, 2:4) expect_copied(res, x) res = ajoin(as.data.frame(x), y) expect_data_table(res, key = "job.id", ncol = 2, any.missing = FALSE) expect_identical(res$job.id, 1L) expect_copied(res, x) res = ujoin(x, y) expect_equivalent(res, x) expect_copied(res, x) yy = copy(y) yy$x = 10:13 res = ujoin(x, yy) expect_data_table(res, key = "job.id", ncol = ncol(x), any.missing = FALSE) expect_identical(res$job.id, 1:5) expect_identical(res$x, c(1L, 10:13)) expect_copied(res, x) res = ujoin(x, yy, all.y = TRUE) expect_data_table(res, key = "job.id", ncol = 3) expect_identical(res$job.id, 1:5) expect_identical(res$x, c(1L, 10:13)) expect_identical(res$extra.col, c(NA, letters[1:4])) expect_copied(res, x) }) test_that("guessBy", { x = data.frame(id = 1:3, x = 1:3) y = data.frame(jid = 1:3, y = 3:1) expect_error(guessBy(x, y), "explicitly") expect_error(guessBy(x, y, by = "id"), "subset of") by = guessBy(x, y, by = c(id = "jid")) expect_equal(unname(by), "jid") expect_equal(names(by), "id") y$id = y$jid by = guessBy(x, y, by = "id") expect_equal(unname(by), "id") expect_equal(names(by), NULL) }) ================================================ FILE: tests/testthat/test_killJobs.R ================================================ test_that("killJobs", { reg = makeTestRegistry() if (is.null(reg$cluster.functions$killJob)) skip("Test requires killJobs") ids = batchMap(Sys.sleep, time = 60, reg = reg) silent(submitJobs(1, reg = reg)) expect_equal(findOnSystem(1, reg = reg), findJobs(reg = reg)) batch.id = reg$status[1, batch.id] silent({ res = killJobs(1, reg = reg) }) expect_equal(res$job.id, 1L) expect_equal(res$batch.id, batch.id) expect_true(res$killed) }) ================================================ FILE: tests/testthat/test_manual.R ================================================ test_that("rscimark", { skip("manual test") reg = makeTestRegistry(package = "rscimark") reg$cluster.functions = makeClusterFunctionsMulticore(4) batchMap(rscimark, minimum.time = rep(1, 5), reg = reg) submitJobs(reg = reg) waitForJobs(reg = reg, sleep = 1) tab = getJobTable(reg = reg) expect_true(tab$started[5] >= min(tab$done[1:4])) reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsMulticore(4) batchMap(Sys.sleep, rep(3, 4), reg = reg) submitJobs(reg = reg) waitForJobs(reg = reg, sleep = 1) tab = getJobTable(reg = reg) expect_true(all(as.numeric(diff(range(tab$started))) <= 2)) expect_true(all(as.numeric(diff(range(tab$done))) <= 2)) }) ================================================ FILE: tests/testthat/test_memory.R ================================================ test_that("memory measurements work", { skip_on_os("windows") skip_on_ci() skip_on_cran() reg = makeTestRegistry() reg$cluster.functions = makeClusterFunctionsSSH(list(Worker$new("localhost"))) ids = batchMap(function(n) { m = matrix(runif(n), nrow = 10); m %*% t(m) }, n = c(100, 1e7), reg = reg) submitAndWait(reg, 1:2, resources = list(measure.memory = TRUE)) expect_true(any(stri_detect_fixed(readLog(1L, reg = reg)$lines, "Memory measurement enabled"))) expect_numeric(reg$status$mem.used, any.missing = FALSE) expect_true(reg$status$mem.used[2] > reg$status$mem.used[1]) }) ================================================ FILE: tests/testthat/test_mergeRegistries.R ================================================ test_that("mergeRegistries", { target = makeTestRegistry() f = function(.job, x) { if (x %in% c(2, 7)) fs::file_create(fs::path(.job$external.dir, "foo")); x^2 } batchMap(f, 1:10, reg = target) td = fs::path(target$temp.dir, fs::path_file(fs::file_temp())) fs::dir_create(td) file.copy(target$file.dir, td, recursive = TRUE) file.dir = fs::path(td, fs::path_file(target$file.dir)) # FIXME: dir_copy? source = loadRegistry(file.dir, writeable = TRUE, make.default = FALSE) submitAndWait(target, data.table(job.id = 1:4, chunk = 1L)) submitAndWait(source, data.table(job.id = 6:9, chunk = c(1L, 1L, 1L, 2L))) expect_data_table(findDone(reg = source), nrow = 4) expect_data_table(findDone(reg = target), nrow = 4) mergeRegistries(source, target) expect_data_table(findDone(reg = source), nrow = 4) expect_data_table(findDone(reg = target), nrow = 8) checkTables(target) expect_set_equal(list.files(dir(target, "external")), as.character(c(2, 7))) expect_equal(unwrap(reduceResultsDataTable(reg = target))$result.1, c(1,2,3,4,6,7,8,9)^2) expect_file_exists(fs::path(target$file.dir, "external", c("2", "7"), "foo")) fs::dir_delete(td) }) ================================================ FILE: tests/testthat/test_parallelMap.R ================================================ silent({ reg = makeTestRegistry() fun = function(i) { fun = function(i) i^2; parallelMap::parallelMap(fun, 1:i)} ids = batchMap(fun, i = 1:4, reg = reg) }) test_that("pm/multicore", { skip_on_os("windows") skip_if_not_installed("parallelMap") skip_on_ci() if (reg$cluster.functions$name %chin% c("Parallel", "Socket")) skip("Nested local parallelization not supported") submitAndWait(reg, ids = ids, resources = list(pm.backend = "multicore", ncpus = 2)) expect_equal(nrow(findDone(reg = reg)), 4L) }) test_that("pm/socket", { skip_if_not_installed("parallelMap") skip_if_not_installed("snow") skip_on_ci() if (reg$cluster.functions$name %chin% c("Parallel", "Socket")) skip("Nested local parallelization not supported") submitAndWait(reg, ids = ids, resources = list(pm.backend = "socket", ncpus = 2)) expect_equal(nrow(findDone(reg = reg)), 4L) }) test_that("parallelMap works with batchtools", { skip_if_not_installed("parallelMap") skip_if_not(packageVersion("parallelMap") >= "1.4") requireNamespace("parallelMap") dir = reg$temp.dir %??% fs::path_temp() parallelMap::parallelStartBatchtools(storagedir = dir, show.info = FALSE) dir = getOption("parallelMap.bt.reg.filedir") res = parallelMap::parallelMap(function(x, y) x + y, x = 1:2, y = 1) parallelMap::parallelStop() if (fs::dir_exists(dir)) fs::dir_delete(dir) expect_equal(res, list(2, 3)) }) # test_that("pm/mpi", { # skip_on_os("mac") # skip_on_cran() # skip_if_not_installed("parallelMap") # skip_if_not_installed("snow") # skip_if_not_installed("Rmpi") # skip_on_ci() # if (reg$cluster.functions$name %chin% c("Parallel", "Socket")) # skip("Nested local parallelization not supported") # submitAndWait(reg, ids = ids, resources = list(pm.backend = "mpi", ncpus = 2)) # expect_equal(nrow(findDone(reg = reg)), 4) # }) ================================================ FILE: tests/testthat/test_reduceResults.R ================================================ suppressMessages({ reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, a = 1:4, b = 4:1, reg = reg) submitAndWait(reg, 1:3) }) test_that("loadResult", { expect_equal(loadResult(reg = reg, 1), list(a = 1, b = 4)) expect_equal(loadResult(reg = reg, 2), list(a = 2, b = 3)) expect_error(loadResult(reg = reg, 4), "not terminated") }) test_that("batchMapResults", { target = makeTestRegistry() x = batchMapResults(target = target, function(x, c, d) x$a+x$b + c + d, c = 11:13, source = reg, more.args = list(d = 2)) expect_data_table(x, nrow = 3, key = "job.id") expect_data_table(target$status, nrow = 3) submitAndWait(target) res = unwrap(reduceResultsDataTable(reg = target)) expect_equal(res[[2L]], 11:13 + rep(5, 3) + 2) }) test_that("reduceResults", { silent({ expect_equal(reduceResults(fun = function(aggr, res, ...) c(aggr, res$a), init = integer(0), reg = reg), 1:3) expect_equal(reduceResults(ids = 1, fun = c, reg = reg), list(a = 1, b = 4)) expect_equal(reduceResults(ids = 1, fun = c, list(c = 1), reg = reg)$c, 1) expect_equal(reduceResults(fun = function(aggr, res, extra.arg, ...) aggr + res$a + extra.arg, init = 0, extra.arg = 1, reg = reg), sum(1:3 + 1)) expect_equal(reduceResults(fun = function(job, aggr, res) c(aggr, job$id), init = integer(0), ids = 2:3, reg = reg), 2:3) expect_list(reduceResults(fun = function(job, aggr, res) c(aggr, list(job)), init = list(), ids = 2:3, reg = reg), types = "Job", len = 2) expect_equal( reduceResults(fun = function(aggr, res, ...) c(aggr, res$a), ids = 3:1, init = integer(0), reg = reg), rev(reduceResults(fun = function(aggr, res, ...) c(aggr, res$a), ids = 1:3, init = integer(0), reg = reg)) ) expect_error(reduceResults(fun = function(aggr, res, ...) c(aggr, res$a), ids = 1:4, init = integer(0), reg = reg), "successfully computed") }) }) test_that("reduceResultsList", { silent({ expect_equal(reduceResultsList(reg = reg), Map(fun, a = 1:3, b = 4:2)) expect_equal(reduceResultsList(reg = reg, fun = function(x) x$a), as.list(1:3)) expect_equal(reduceResultsList(reg = reg, fun = function(x, y) x$a + y, y = 1), as.list(1:3 + 1)) expect_list(reduceResultsList(reg = reg, fun = function(job, ...) job), types = "Job", len = 3) expect_equal(reduceResultsList(ids = 2:1, reg = reg), rev(reduceResultsList(ids = 1:2, reg = reg))) }) }) test_that("reduceResultsDataTable", { silent({ tab = unwrap(reduceResultsDataTable(reg = reg)) expect_data_table(tab, nrow = 3, ncol = 3, key = "job.id") expect_null(tab$result) expect_equal(tab$a, 1:3) tab = unwrap(reduceResultsDataTable(reg = reg, fun = function(x) list(a = x$a))) expect_data_table(tab, nrow = 3, ncol = 2, key = "job.id") expect_equal(tab$a, 1:3) tab = unwrap(reduceResultsDataTable(reg = reg, ids = 3:2, fun = function(x) list(a = x$a))) expect_data_table(tab, nrow = 2, ncol = 2, key = "job.id") expect_equal(tab$a, 2:3) tab = unwrap(reduceResultsDataTable(reg = reg, fun = function(x) x$a)) expect_data_table(tab, nrow = 3, ncol = 2, key = "job.id") expect_equal(tab$result.1, 1:3) tab = unwrap(reduceResultsDataTable(reg = reg, fun = function(x, y) x$a + y, y = 1)) expect_data_table(tab, nrow = 3, ncol = 2, key = "job.id") expect_equal(tab$result.1, 1:3 + 1L) }) }) test_that("reduceResultsDataTable/unwrap simple", { silent({ tab = reduceResultsDataTable(reg = reg) expect_data_table(tab, nrow = 3, ncol = 2, key = "job.id") expect_set_equal(names(tab), c("job.id", "result")) expect_list(tab$result[[1]], types = "numeric", len = 2) tab = unwrap(tab) expect_data_table(tab, ncol = 3) expect_equal(tab$job.id, 1:3) expect_equal(tab$a, 1:3) expect_equal(tab$b, 4:2) }) }) suppressMessages({ reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", fun = function(job, data, ...) 2, seed = 42) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, sq) instance^sq) ids = addExperiments(list(p1 = data.table()), list(a1 = data.table(sq = 1:3)), reg = reg) submitAndWait(reg = reg) }) test_that("reduceResults/BatchExperiments", { silent({ expect_equal(reduceResults(fun = function(aggr, res, ...) c(aggr, res), init = integer(0), reg = reg), 2^(1:3)) expect_equal(reduceResults(ids = 2:3, fun = function(aggr, job, res, ...) c(aggr, job$id), init = integer(0), reg = reg), 2:3) expect_list(reduceResults(fun = function(job, aggr, res) c(aggr, list(job)), init = list(), ids = 2:3, reg = reg), types = "Experiment", len = 2) }) }) test_that("reduceResultsList/BatchExperiments", { silent({ expect_equal(reduceResultsList(reg = reg), as.list(2^(1:3))) expect_equal(reduceResultsList(fun = function(job, ...) job$prob.name, reg = reg), as.list(rep("p1", 3))) expect_equal(reduceResultsList(fun = function(job, ...) job$algo.name, reg = reg), as.list(rep("a1", 3))) expect_equal(reduceResultsList(fun = function(job, ...) job$instance, reg = reg), as.list(rep(2, 3))) }) }) test_that("reduceResults with no results reg", { silent({ reg = makeTestRegistry() expect_equal(reduceResults(fun = c, reg = reg), NULL) expect_equal(reduceResults(fun = c, reg = reg, init = 42), 42) expect_equal(reduceResultsList(reg = reg), list()) fun = function(...) list(...) ids = batchMap(fun, a = 1:3, b = 3:1, reg = reg) expect_equal(reduceResults(fun = c, reg = reg), NULL) expect_equal(reduceResults(fun = c, reg = reg, init = 42), 42) expect_equal(reduceResultsList(reg = reg), list()) }) }) test_that("reduceResultsList/NULL", { reg = makeTestRegistry() f = function(...) NULL ids = batchMap(f, 1:3, reg = reg) submitAndWait(ids, reg = reg) res = reduceResultsList(ids = ids, reg = reg) expect_equal(res, replicate(3, NULL, simplify = FALSE)) }) test_that("reduceResultsDataTable/multiRowResults", { silent({ reg = makeTestRegistry() fun = function(a) data.frame(y1 = rep(a, 3), y2 = rep(a/2, 3)) ids = batchMap(fun, a = c(10, 100), reg = reg) submitAndWait(reg, ids) res = reduceResultsDataTable(reg = reg) expect_data_frame(res, ncol = 2, nrow = 2) expect_list(res$result, types = "data.frame", len = 2L) }) }) test_that("reduceResultsDataTable/unwrap objects", { silent({ reg = makeTestRegistry() fun = function(...) iris ids = batchMap(fun, i = 1:2, reg = reg) submitAndWait(reg, 1:2) tab = reduceResultsDataTable(reg = reg) expect_data_table(tab, nrow = 2, ncol = 2, key = "job.id") expect_set_equal(names(tab), c("job.id", "result")) expect_list(tab$result, types = "data.frame", names = "unnamed") }) }) ================================================ FILE: tests/testthat/test_removeExperiments.R ================================================ test_that("removeExperiments", { reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", data = iris, fun = function(job, data) nrow(data), seed = 42) prob = addProblem(reg = reg, "p2", data = iris, fun = function(job, data) nrow(data), seed = 42) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, sq) instance^sq) algo = addAlgorithm(reg = reg, "a2", fun = function(job, data, instance, sq) instance^sq) ids = addExperiments(list(p1 = data.table(), p2 = data.table(x = 1:2)), list(a1 = data.table(sq = 1:3), a2 = data.table(sq = 1:2)), reg = reg) N = nrow(findExperiments(reg = reg)) expect_data_table(removeExperiments(1, reg = reg), nrow = 1, key = "job.id") expect_equal(findExperiments(reg = reg)$job.id, 2:N) expect_data_table(removeExperiments(1, reg = reg), nrow = 0, key = "job.id") expect_equal(findExperiments(reg = reg)$job.id, 2:N) ids = findExperiments(prob.name = "p1", reg = reg) expect_data_table(removeExperiments(ids, reg = reg), nrow = 4, key = "job.id") expect_equal(findExperiments(reg = reg)$job.id, 6:N) expect_true(fs::file_exists(getProblemURI(reg, "p1"))) expect_set_equal(c("p1", "p2"), reg$problems) ids = findExperiments(algo.name = "a2", reg = reg) expect_data_table(removeExperiments(ids, reg = reg), nrow = 4, key = "job.id") expect_equal(findExperiments(reg = reg)$job.id, 6:(N-nrow(ids))) expect_true(fs::file_exists(getAlgorithmURI(reg, "a2"))) expect_set_equal(c("a1", "a2"), reg$algorithms) checkTables(reg) }) ================================================ FILE: tests/testthat/test_removeRegistry.R ================================================ test_that("removeRegistry", { reg = makeTestRegistry() expect_directory_exists(reg$file.dir) res = removeRegistry(0.01, reg = reg) expect_string(res) expect_false(fs::dir_exists(reg$file.dir)) }) test_that("removeRegistry resets default registry", { prev = batchtools$default.registry reg = makeTestExperimentRegistry(make.default = TRUE) expect_is(batchtools$default.registry, "Registry") res = removeRegistry(0, reg = reg) expect_false(fs::dir_exists(reg$file.dir)) expect_null(batchtools$default.registry) batchtools$default.registry = prev }) ================================================ FILE: tests/testthat/test_resetJobs.R ================================================ test_that("resetJobs", { reg = makeTestRegistry() f = function(x, .job) if (x == 2) stop(2) else .job$external.dir batchMap(f, 1:3, reg = reg) before = list( status = copy(reg$status), defs = copy(reg$defs) ) submitAndWait(reg, 1:3) expect_file_exists(getLogFiles(reg, 3)) expect_false(identical(reg$status$submitted, before$status$submitted)) expect_file_exists(getResultFiles(reg, 1)) expect_equal(unname(fs::dir_exists(fs::path(reg$file.dir, "external", 1:3))), c(TRUE, FALSE, TRUE)) resetJobs(1, reg = reg) expect_true(all.equal(before$status[1], reg$status[1])) expect_false(fs::file_exists(getResultFiles(reg, 1))) expect_true(fs::file_exists(getResultFiles(reg, 3))) expect_file_exists(getLogFiles(reg, 3)) expect_equal(unname(fs::dir_exists(fs::path(reg$file.dir, "external", 1:3))), c(FALSE, FALSE, TRUE)) expect_false(fs::file_exists(getResultFiles(reg, 1))) expect_file_exists(getResultFiles(reg, 3)) resetJobs(2:3, reg = reg) expect_data_table(reg$status, key = "job.id") expect_data_table(reg$defs, key = "def.id") expect_equivalent(before$status, reg$status) expect_false(fs::file_exists(getLogFiles(reg, 3))) expect_false(fs::file_exists(getResultFiles(reg, 3))) expect_equal(unname(fs::dir_exists(fs::path(reg$file.dir, "external", 1:3))), c(FALSE, FALSE, FALSE)) }) test_that("functions produce error after resetting jobs", { reg = makeTestRegistry() f = function(x, .job) if (x == 2) stop(2) else .job$external.dir batchMap(f, 1:3, reg = reg) submitAndWait(reg, 1:3) resetJobs(1, reg = reg) expect_error(getLog(1, reg = reg), "not available") expect_error(loadResult(1, reg = reg), "not terminated") }) ================================================ FILE: tests/testthat/test_runOSCommand.R ================================================ test_that("runOSCommand", { skip_on_os(c("windows", "solaris")) # system2 is broken on solaris x = runOSCommand("ls", find.package("batchtools")) expect_list(x, names = "named", len = 4L) expect_names(names(x), permutation.of = c("sys.cmd", "sys.args", "exit.code", "output")) expect_identical(x$exit.code, 0L) expect_true(all(c("DESCRIPTION", "NAMESPACE", "NEWS.md") %chin% x$output)) }) test_that("command not found", { skip_on_os("solaris") # system2 is broken on solaris res = runOSCommand("notfoundcommand") expect_list(res, len = 4) expect_identical(res$exit.code, 127L) expect_identical(res$output, "command not found") expect_error(OSError("Command not found", res), pattern = "Command not found") expect_error(OSError("Command not found", res), pattern = "'notfoundcommand'") expect_error(OSError("Command not found", res), pattern = "exit code 127") }) test_that("stdin", { skip_on_os(c("windows", "solaris")) # system2 is broken on solaris tf = fs::file_temp() lines = letters writeLines(letters, con = tf) res = runOSCommand("cat", stdin = tf) expect_identical(res$exit.code, 0L) expect_identical(res$output, letters) fs::file_delete(tf) }) ================================================ FILE: tests/testthat/test_seed.R ================================================ test_that("with_seed", { set.seed(1) x.1 = runif(5) set.seed(42) x.42 = runif(5) x.next = runif(5) set.seed(42) y.1 = withr::with_seed(1, runif(5)) y.42 = runif(5) y.next = runif(5) expect_identical(x.1, y.1) expect_identical(x.42, y.42) expect_identical(x.next, y.next) expect_error(withr::with_seed(1, print(state))) }) test_that("Problem and Algorithm seed", { reg = makeTestExperimentRegistry(seed = 42) addProblem(reg = reg, "p1", data = iris, fun = function(job, data, ...) runif(1), seed = 1L) addProblem(reg = reg, "p2", data = iris, fun = function(job, data, ...) runif(1)) addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) list(instance = instance, res = runif(1))) addAlgorithm(reg = reg, "a2", fun = function(job, data, instance, ...) list(instance = instance, res = runif(1))) prob.designs = list(p1 = data.table(), p2 = data.table()) algo.designs = list(a1 = data.table(), a2 = data.table()) repls = 3 ids = addExperiments(prob.designs, algo.designs, repls = repls, reg = reg) submitAndWait(reg, ids) set.seed(1); p1 = runif(1) set.seed(2); p2 = runif(1) set.seed(3); p3 = runif(1) set.seed(43); a1 = runif(1) set.seed(44); a2 = runif(1) set.seed(45); a3 = runif(1) silent({ ids = findExperiments(algo.name = "a1", prob.name = "p1", reg = reg) results = rbindlist(reduceResultsList(ids, reg = reg), use.names = TRUE) }) expect_true(all(results$instance == c(p1, p2, p3))) expect_true(all(results$res == c(a1, a2, a3))) silent({ ids = findExperiments(prob.name = "p1", repl = 2, reg = reg) results = rbindlist(reduceResultsList(ids, reg = reg), use.names = TRUE) }) expect_true(all(results$instance == p2)) silent({ ids = findExperiments(prob.name = "p2", reg = reg) results = rbindlist(reduceResultsList(ids, reg = reg), use.names = TRUE) }) expect_numeric(results$instance, unique = TRUE) expect_numeric(results$res, unique = TRUE) }) test_that("Seed is correctly reported (#203)", { reg = makeTestRegistry(seed = 1) batchMap(function(x, .job) list(seed = .job$seed), x = 1:3, reg = reg) submitAndWait(reg) res = unwrap(reduceResultsDataTable(reg = reg)) expect_data_table(res, nrow = 3, ncol = 2) expect_identical(res$seed, 2:4) expect_true(any(stri_detect_fixed(getLog(1, reg = reg), "Setting seed to 2"))) expect_true(any(stri_detect_fixed(getLog(2, reg = reg), "Setting seed to 3"))) expect_true(any(stri_detect_fixed(getLog(3, reg = reg), "Setting seed to 4"))) reg = makeTestExperimentRegistry(seed = 1) addProblem(reg = reg, "p1", fun = function(job, ...) job$seed, seed = 100L) addAlgorithm(reg = reg, "a1", fun = function(job, instance, ...) list(instance = instance, seed = job$seed)) ids = addExperiments(repls = 2, reg = reg) getStatus(reg = reg) submitAndWait(reg) res = unwrap(reduceResultsDataTable(reg = reg)) expect_data_table(res, nrow = 2, ncol = 3) expect_identical(res$instance, 2:3) expect_identical(res$seed, 2:3) expect_true(any(stri_detect_fixed(getLog(1, reg = reg), "seed = 2"))) expect_true(any(stri_detect_fixed(getLog(2, reg = reg), "seed = 3"))) }) ================================================ FILE: tests/testthat/test_showLog.R ================================================ test_that("showLog/getLog", { reg = makeTestRegistry() batchMap(function(x) print("GREPME"), 1:2, reg = reg) expect_error(showLog(id = 1, reg = reg), "not available") expect_error(readLog(id = data.table(job.id = 1L), reg = reg), "not available") submitAndWait(reg) lines = getLog(id = 1, reg = reg) expect_character(lines, min.len = 3L, any.missing = FALSE) expect_equal(sum(stri_detect_fixed(lines, "GREPME")), 1L) expect_true(any(stri_startswith_fixed(lines, "### [bt"))) expect_identical(sum(stri_endswith_fixed(lines, "[batchtools job.id=1]")), 2L) expect_false(any(stri_endswith_fixed(lines, "[batchtools job.id=2]"))) lines = getLog(id = 2, reg = reg) expect_false(any(stri_endswith_fixed(lines, "[batchtools job.id=1]"))) withr::with_options(list(pager = function(files, header, title, delete.file) files), { x = showLog(id = 2, reg = reg) expect_equal(fs::path_file(x), "2.log") expect_equal(sum(stri_detect_fixed(readLines(x), "GREPME")), 1L) }) expect_error(getLog(id = 1:2, reg = reg), "exactly") expect_error(getLog(id = 3, reg = reg), "exactly") }) test_that("empty log files", { reg = makeTestRegistry() batchMap(identity, 1, reg = reg) submitAndWait(reg) # overwrite log file log.file = getLogFiles(reg, 1) fs::file_delete(log.file) fs::file_create(log.file) x = readLog(data.table(job.id = 1), reg = reg) expect_data_table(x, ncol = 2, nrow = 0, index = "job.id") expect_equal(getLog(1, reg = reg), character(0L)) }) ================================================ FILE: tests/testthat/test_sleep.R ================================================ test_that("getSleepFunction", { reg = makeTestRegistry() f = getSleepFunction(reg, NULL) expect_function(f) expect_true(any(grepl("Sys.sleep", as.character(body(f))))) f = getSleepFunction(reg, 99) expect_function(f) expect_true(any(grepl("Sys.sleep", as.character(body(f))))) f = getSleepFunction(reg, function(x) x^2) expect_function(f) expect_true(any(grepl("Sys.sleep", as.character(body(f))))) }) ================================================ FILE: tests/testthat/test_submitJobs.R ================================================ test_that("submitJobs", { reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, i = 1:3, reg = reg) submitAndWait(reg, 1:2, resources = list(foo = "bar")) checkTables(reg) expect_integer(reg$status[1:2, resource.id], any.missing = FALSE) expect_character(reg$status[1:2, batch.id], any.missing = FALSE) expect_numeric(reg$status[1:2, submitted], any.missing = FALSE) expect_true(is.na(reg$status[3, submitted])) x = reg$resources[1, resources][[1L]] y = insert(reg$default.resources, list(foo = "bar")) if (isTRUE(y$chunks.as.arrayjobs) && is.na(reg$cluster.functions$array.var)) y$chunks.as.arrayjobs = NULL expect_equal(x[order(names2(x))], y[order(names2(y))]) submitAndWait(reg, 3, resources = list(walltime = 100, memory = 500)) res = reg$resources[2, resources][[1L]] expect_equal(res$walltime, 100) expect_equal(res$memory, 500) # should be 2 chunks? expect_equal(uniqueN(reg$status$job.hash), 2) }) test_that("per job resources", { reg = makeTestRegistry() fun = function(...) list(...) ids = batchMap(fun, i = 1:3, reg = reg) ids$walltime = as.integer(c(180, 120, 180)) ids$chunk = 1:3 submitAndWait(reg, ids = ids) res = reg$resources expect_data_table(res, nrow = 2) expect_equal(uniqueN(res, by = "resource.hash"), 2L) expect_set_equal(rbindlist(res$resources)$walltime, c(120L, 180L)) ids$chunk = 1L expect_error(submitJobs(ids, reg = reg), "per-job") }) ================================================ FILE: tests/testthat/test_summarizeExperiments.R ================================================ test_that("summarizeExperiments", { reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", data = iris, fun = function(job, data) nrow(data), seed = 42) prob = addProblem(reg = reg, "p2", data = iris, fun = function(job, data) nrow(data), seed = 42) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, sq) instance^sq) ids = addExperiments(list(p1 = data.table(), p2 = data.table(x = 1:2)), list(a1 = data.table(sq = 1:3)), reg = reg) s = summarizeExperiments(reg = reg) expect_data_table(s, nrows = 2, ncols = 3) expect_equal(s$.count, c(3, 6)) expect_equal(s$problem, c("p1", "p2")) expect_equal(s$algorithm, c("a1", "a1")) s = summarizeExperiments(reg = reg, by = c("problem", "algorithm", "x")) expect_data_table(s, nrows = 3, ncols = 4) expect_equal(s$.count, c(3, 3, 3)) expect_equal(s$x, c(NA, 1, 2)) }) ================================================ FILE: tests/testthat/test_sweepRegistry.R ================================================ test_that("sweepRegistry", { reg = makeTestRegistry() array.jobs = isTRUE(reg$default.resources$chunks.as.arrayjobs) batchMap(identity, 1, reg = reg) submitAndWait(reg, 1, resources = list(foo = 1)) submitAndWait(reg, 1, resources = list(foo = 2)) writeRDS(makeJobCollection(1, reg = reg), fs::path(reg$file.dir, "jobs", "test.rds")) expect_data_table(reg$resources, nrow = 2) expect_character(list.files(dir(reg, "logs")), len = 2L) expect_character(list.files(fs::path(reg$file.dir, "jobs"), pattern = "\\.rds$"), len = 1L + (array.jobs && reg$cluster.functions$store.job.collection) * 2L) expect_character(list.files(fs::path(reg$file.dir, "jobs"), pattern = "\\.job$"), len = (batchtools$debug && array.jobs) * 2L) expect_true(sweepRegistry(reg)) expect_data_table(reg$resources, nrow = 1) expect_character(list.files(dir(reg, "logs")), len = 1L) if (reg$cluster.functions$store.job.collection) expect_character(list.files(fs::path(reg$file.dir, "jobs")), len = 0L) checkTables(reg) reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", data = iris, fun = function(job, data, ...) nrow(data)) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, ...) NULL) addExperiments(prob.designs = list(p1 = data.table(i = 1:10)), reg = reg) addJobTags(6:10, "foo", reg = reg) expect_data_table(reg$tags, nrow = 5, any.missing = FALSE) removeExperiments(ids = 6:10, reg = reg) expect_data_table(reg$tags, nrow = 0) checkTables(reg) }) test_that("relative paths work (#113)", { skip_on_cran() skip_on_os("windows") skip_if_not(is.null(getSysConf()$temp.dir)) # we are probably on a system where home is not shared fd = sprintf("~/batchtools-test-%s", fs::path_file(fs::file_temp(""))) reg = makeTestExperimentRegistry(file.dir = fd) problems = list("a", "b") pdes = lapply(problems, function(p) { addProblem(name = p, data = p, fun = function(...) list(...), reg = reg) res = data.frame(fold = 1:3) }) names(pdes) = problems algo.rep1 = function(job, data, instance, x) { rep(paste(data, x), instance$fold) } algo.rep2 = function(job, data, instance, x) { rep(paste(data, x), instance$fold) } addAlgorithm(name = "rep1", fun = algo.rep1, reg = reg) addAlgorithm(name = "rep2", fun = algo.rep2, reg = reg) ades = list( rep1 = data.table(x = LETTERS[1:3]), rep2 = data.table(x = letters[1:3]) ) addExperiments(pdes, ades, reg = reg) submitAndWait(reg = reg) ids.rep1 = findExperiments(algo.name = "rep1", reg = reg) ids.rep2 = findExperiments(algo.name = "rep2", reg = reg) removeExperiments(ids.rep2, reg = reg) expect_character(getLog(ids.rep1[1], reg = reg), min.len = 1, any.missing = FALSE) expect_list(reduceResultsList(ids = ids.rep1, reg = reg), len = 18) checkTables(reg) fs::dir_delete(fs::path_expand(fd)) }) ================================================ FILE: tests/testthat/test_tags.R ================================================ test_that("tags work", { reg = makeTestRegistry() batchMap(identity, 1:10, reg = reg) expect_equal(getUsedJobTags(reg = reg), character()) expect_data_table(findTagged(tag = "foo", reg = reg), nrow = 0, ncol = 1) expect_data_table(removeJobTags(reg = reg, tags = "foo"), nrow = 0, ncol = 1) expect_data_table(addJobTags(1:4, "walltime", reg = reg), nrow = 4, key = "job.id") expect_data_table(addJobTags(3:7, "broken", reg = reg), nrow = 5, key = "job.id") expect_set_equal(getUsedJobTags(reg = reg), c("walltime", "broken")) expect_set_equal(getUsedJobTags(1:2, reg = reg), c("walltime")) addJobTags(tags = c("foo", "bar"), reg = reg) x = getJobTags(reg = reg) expect_true(all(stri_detect_fixed(x$tags, "foo"))) expect_true(all(stri_detect_fixed(x$tags, "bar"))) x = removeJobTags(tags = c("foo", "bar"), reg = reg) expect_data_table(x, ncol = 1, nrow = 10, key = "job.id") x = getJobTags(reg = reg) expect_false(any(stri_detect_fixed(x$tags, "foo"), na.rm = TRUE)) expect_false(any(stri_detect_fixed(x$tags, "bar"), na.rm = TRUE)) x = getJobTags(reg = reg) expect_data_table(x, nrow = 10, ncol = 2, key = "job.id") expect_character(x$tags, min.len = 1L) x = findTagged(tags = "broken", reg = reg) expect_data_table(x, nrow = 5, ncol = 1, key = "job.id") expect_equal(x$job.id, 3:7) x = findTagged(tags = "whoops", reg = reg) expect_data_table(x, nrow = 0, ncol = 1, key = "job.id") x = removeJobTags(9:3, "walltime", reg = reg) expect_data_table(x, ncol = 1, nrow = 2, key = "job.id") expect_equal(x$job.id, 3:4) x = getJobTags(reg = reg) expect_equal(x$tags, c(rep("walltime", 2), rep("broken", 5), rep(NA_character_, 3))) checkTables(reg) }) ================================================ FILE: tests/testthat/test_testJob.R ================================================ test_that("testJob", { reg = makeTestRegistry() f = function(x) if (x %% 2 == 0) stop("foo") else x^2 batchMap(reg = reg, f, 1:3) expect_equal(testJob(reg = reg, id = 1), 1) expect_equal(testJob(reg = reg, id = 3), 9) expect_error(testJob(reg = reg, id = 2), "foo") expect_equal(suppressAll(testJob(reg = reg, id = 1, external = TRUE)), 1) expect_error(suppressAll(testJob(reg = reg, id = 2, external = TRUE)), "re-run") expect_equal(findSubmitted(reg = reg), data.table(job.id = integer(0L), key = "job.id")) expect_equal(findDone(reg = reg), data.table(job.id = integer(0L), key = "job.id")) expect_equal(findErrors(reg = reg), data.table(job.id = integer(0L), key = "job.id")) }) test_that("testJob.ExperimentRegistry", { reg = makeTestExperimentRegistry() prob = addProblem(reg = reg, "p1", data = iris, fun = function(job, data, ...) nrow(data), seed = 42) algo = addAlgorithm(reg = reg, "a1", fun = function(job, data, instance, sq, ...) instance^sq) ids = addExperiments(prob.designs = list(p1 = data.table()), algo.designs = list(a1 = data.table(sq = 1:3)), reg = reg) suppressAll(x <- testJob(id = 1, reg = reg)) expect_equal(x, 150) suppressAll(x <- testJob(id = 2, reg = reg, external = TRUE)) expect_equal(x, 150^2) }) test_that("traceback works in external session", { reg = makeTestRegistry() f = function(x) { g = function(x) findme(x) findme = function(x) h(x) h = function(x) stop("Error in h") g(x) } batchMap(f, 1, reg = reg) expect_output(expect_error(testJob(1, external = TRUE, reg = reg), "external=FALSE"), "findme") }) ================================================ FILE: tests/testthat/test_unwrap.R ================================================ test_that("unwrap behaves", { x = data.table( id = 1:3, nested.list = list(list(a = 1), list(a = 2), list(a = 33)), nested.2dlist = list(list(a = 1, b = 2), list(a = 1), list(b = 2)), nested.df = list(data.frame(a = 1, b = 2), data.frame(a = 1), data.frame(b = 2)), multi.row = list(data.frame(a = 1:2, b = 1:2), data.frame(a = 3:4, b = 3:4), data.frame(a = 1:2, b = 3:4)), empty = list(NULL, NULL, NULL) ) cols = "nested.list" res = unwrap(x, cols) expect_data_table(res, nrow = nrow(x), ncol = ncol(x), col.names = "unique", any.missing = FALSE) expect_equal(names(res), c("id", "nested.2dlist", "nested.df", "multi.row", "empty", "a")) expect_numeric(res[["a"]]) cols = "nested.list" res = unwrap(x, cols, sep = ".") expect_data_table(res, nrow = nrow(x), ncol = ncol(x), col.names = "unique", any.missing = FALSE) expect_equal(names(res), c("id", "nested.2dlist", "nested.df", "multi.row", "empty", "nested.list.a")) expect_numeric(res[["nested.list.a"]]) cols = "nested.2dlist" res = unwrap(x, cols) expect_data_table(res, nrow = nrow(x), ncol = ncol(x) + 1L, col.names = "unique", any.missing = TRUE) expect_equal(names(res), c("id", "nested.list", "nested.df", "multi.row", "empty", "a", "b")) expect_numeric(res[["a"]]) expect_numeric(res[["b"]]) cols = "nested.2dlist" res = unwrap(x, cols, sep = "_") expect_data_table(res, nrow = nrow(x), ncol = ncol(x) + 1L, col.names = "unique", any.missing = TRUE) expect_set_equal(names(res), c("id", "nested.list", "nested.2dlist_a", "nested.2dlist_b", "nested.df", "multi.row", "empty")) expect_numeric(res[["nested.2dlist_a"]]) expect_numeric(res[["nested.2dlist_b"]]) cols = "nested.df" res = unwrap(x, cols, sep = "_") expect_data_table(res, nrow = nrow(x), ncol = ncol(x) + 1L, col.names = "unique", any.missing = TRUE) expect_set_equal(names(res), c("id", "nested.list", "nested.2dlist", "nested.df_a", "nested.df_b", "multi.row", "empty")) expect_numeric(res[["nested.df_a"]]) expect_numeric(res[["nested.df_b"]]) cols = "empty" res = unwrap(x, cols) expect_data_table(res, nrow = nrow(x), ncol = ncol(x) - 1L, col.names = "unique", any.missing = TRUE) expect_equal(names(res), c("id", "nested.list", "nested.2dlist", "nested.df", "multi.row")) expect_error(unwrap(x), "Name clash") x = data.table(x = list(2, 3, 5), y = 1:3) res = unwrap(x) expect_data_table(res, nrow = 3, ncol = 2, col.names = "unique", any.missing = FALSE) expect_set_equal(names(res), c("y", "x.1")) }) ================================================ FILE: tests/testthat/test_waitForJobs.R ================================================ test_that("waitForJobs", { reg = makeTestRegistry() fun = function(x) if (x == 2) stop(x) else x ids = batchMap(reg = reg, fun, 1:2) silent({ submitJobs(ids, reg = reg) expect_true(waitForJobs(ids = ids[1], reg = reg, sleep = 1)) expect_false(waitForJobs(ids = ids, stop.on.error = TRUE, sleep = 1, expire.after = 3, reg = reg)) }) }) test_that("waitForJobs: detection of expired jobs", { reg = makeTestRegistry() if (is.null(reg$cluster.functions$killJob)) skip("Test requires killJobs") ids = batchMap(reg = reg, Sys.sleep, c(20, 20)) ids$chunk = 1L silent({ submitJobs(ids, reg = reg) batch.ids = reg$status$batch.id reg$cluster.functions$killJob(reg, batch.ids[1]) expect_warning(waitForJobs(ids, reg = reg, sleep = 1, stop.on.expire = TRUE), "disappeared") }) }) test_that("waitForJobs: filter out unsubmitted jobs", { reg = makeTestRegistry() ids = batchMap(identity, 1:2, reg = reg) silent({ submitJobs(ids = 1, reg = reg) expect_warning(res <- waitForJobs(ids = ids, reg = reg, sleep = 1), "unsubmitted") expect_true(res) }) }) ================================================ FILE: tests/testthat.R ================================================ # setting R_TESTS to empty string because of # https://github.com/hadley/testthat/issues/144 # revert this when that issue in R is fixed. Sys.setenv("R_TESTS" = "") library(testthat) library(batchtools) test_check("batchtools") ================================================ FILE: vignettes/batchtools.Rmd ================================================ --- title: "batchtools" output: html_document: toc: true urlcolor: blue linkcolor: blue vignette: > %\VignetteIndexEntry{batchtools} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r,include = FALSE, cache = FALSE} library(batchtools) library(data.table) # for %chin%, data.table options(batchtools.progress = FALSE, datatable.print.class = TRUE, batchtools.timestamps = FALSE) if (identical(Sys.getenv("IN_PKGDOWN"), "true")) { tmp_dir = fs::path(dirname(tempdir()), "batchtools-vignette") if (fs::dir_exists(tmp_dir)) fs::dir_delete(tmp_dir) fs::file_temp_push(fs::path(tmp_dir, letters)) } ``` # Setup ## Cluster Functions The communication with the batch system is managed via so-called cluster functions. They are created with the constructor `makeClusterFunctions()` which defines how jobs are submitted on your system. Furthermore, you may provide functions to list queued/running jobs and to kill jobs. Usually you do not have to start from scratch but can just use one of the cluster functions which ship with the package: * Interactive Cluster Functions (default): `makeClusterFunctionsInteractive()`, [implementation](https://github.com/mlr-org/batchtools/blob/master/R/clusterFunctionsInteractive.R) * Multicore Cluster Functions: `makeClusterFunctionsMulticore()`, [implementation](https://github.com/mlr-org/batchtools/blob/master/R/clusterFunctionsMulticore.R) * Socket Cluster Functions: `makeClusterFunctionsSocket()`, [implementation](https://github.com/mlr-org/batchtools/blob/master/R/clusterFunctionsSocket.R) * Makeshift SSH cluster: `makeClusterFunctionsSSH()`, [implementation](https://github.com/mlr-org/batchtools/blob/master/R/clusterFunctionsSSH.R) * Docker Swarm: `makeClusterFunctionsDocker()`, [implementation](https://github.com/mlr-org/batchtools/blob/master/R/clusterFunctionsDocker.R) * IBM Spectrum Load Sharing Facility (LSF): `makeClusterFunctionsLSF()`, [implementation](https://github.com/mlr-org/batchtools/blob/master/R/clusterFunctionsLSF.R) * OpenLava: `makeClusterFunctionsOpenLava()`, [implementation](https://github.com/mlr-org/batchtools/blob/master/R/clusterFunctionsOpenLava.R) * Univa Grid Engine / Oracle Grid Engine (OGE) / Sun Grid Engine (SGE): `makeClusterFunctionsSGE()`, [implementation](https://github.com/mlr-org/batchtools/blob/master/R/clusterFunctionsSGE.R) * Slurm: `makeClusterFunctionsSlurm()`, [implementation](https://github.com/mlr-org/batchtools/blob/master/R/clusterFunctionsSlurm.R) * TORQUE/OpenPBS: `makeClusterFunctionsTORQUE()`, [implementation](https://github.com/mlr-org/batchtools/blob/master/R/clusterFunctionsTORQUE.R) To use the package with the socket cluster functions, you would call the respective constructor `makeClusterFunctionsSocket()`: ```{r, message=FALSE} reg = makeRegistry(NA) reg$cluster.functions = makeClusterFunctionsSocket(2) ``` To make this selection permanent for this registry, save the `?Registry` with `saveRegistry()`. To make your cluster function selection permanent for a specific system across R sessions for all new Registries, you can set up a configuration file (see below). If you have trouble debugging your cluster functions, you can enable the debug mode for extra output. To do so, install the [debugme package](https://cran.r-project.org/package=debugme) and set the environment variable `DEBUGME` to `batchtools` before you load the `batchtools` package: ```{r,eval=FALSE} Sys.setenv(DEBUGME = "batchtools") library(batchtools) ``` ## Template Files Many cluster functions require a template file as argument. These templates are used to communicate with the scheduler and contain placeholders to evaluate arbitrary R expressions. Internally, the [brew package](https://cran.r-project.org/package=brew) is used for this purpose. Some exemplary template files can be found [here](https://github.com/mlr-org/batchtools/tree/master/inst/templates). It would be great if you would help expand this collection to cover more exotic configurations. To do so, open a new pull request. Note that all variables defined in a `?JobCollection` can be used inside the template. If you need to pass extra variables, you can set them via the argument `resources` of `submitJobs()`. If the flexibility which comes with templating is not sufficient, you can still construct a custom cluster function implementation yourself using the provided `makeClusterFunctions()`. ## Configuration File The configuration file can be used to set system specific options. Its default location depends on the operating system (see `makeRegistry()`), but for the first time setup you can put one in the current working directory (as reported by `getwd()`). In order to set the cluster function implementation, you would generate a file with the following content: ```{r,eval = FALSE} cluster.functions = makeClusterFunctionsInteractive() ``` The configuration file is parsed whenever you create or load a `?Registry`. It is sourced inside of your registry which has the advantage that you can (a) access all of the parameters which are passed to makeRegistry() and (b) you can also directly change them. Lets say you always want your working directory in your home directory and you always want to load the `checkmate` package on the nodes, you can just append these lines: ```{r, eval = FALSE} work.dir = "~" packages = union(packages, "checkmate") ``` See the documentation on `?Registry` for a more complete list of supported configuration options. # Migration from `BatchJobs`/`Batchexperiments` The development of [BatchJobs](https://github.com/tudo-r/BatchJobs/) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments) is discontinued because of the following reasons: * Maintainability: The packages [BatchJobs](https://github.com/tudo-r/BatchJobs/) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments) are tightly connected which makes maintaining difficult. Changes have to be synchronized and tested against the current CRAN versions for compatibility. Furthermore, BatchExperiments violates CRAN policies by calling internal functions of BatchJobs. * Data base issues: Although we invested weeks to mitigate issues with locks of the SQLite data base or file system (staged queries, file system timeouts, ...), BatchJobs kept working unreliable on some systems with high latency or specific file systems. This made BatchJobs unusable for many users. [BatchJobs](https://github.com/tudo-r/BatchJobs/) and [BatchExperiments](https://github.com/tudo-r/Batchexperiments) will remain on CRAN or Github, but new features are unlikely to be ported back. ## Internal Changes * batchtools does not use SQLite anymore. Instead, all the information is stored directly in the registry using [data.tables](https://cran.r-project.org/package=data.table) acting as an in-memory database. As a side effect, many operations are much faster. * Nodes do not have to access the registry. `submitJobs()` stores a temporary object of type `doJobCollection()` on the node. This avoids file system locks because each job accesses only one file exclusively. * `ClusterFunctionsMulticore` now uses the parallel package for multicore execution. * `ClusterFunctionsSSH` can still be used to emulate a scheduler-like system which respects the work load on the local machine. Setting the hostname to `"localhost"` just strips out `ssh` of the command issued. ## Interface Changes * batchtools remembers the last created or loaded Registry and sets it as default registry. This way, you do not need to pass the registry around anymore. If you need to work with multiple registries simultaneously on the other hand, you can still do so by explicitly passing registries to the functions. * Most functions now return a [data.table](https://cran.r-project.org/package=data.table) which is keyed with the `job.id`. This way, return values can be joined together easily and efficient (see `?JoinTables`) for some examples). * The building blocks of a problem has been renamed from `static` and `dynamic` to the more intuitive `data` and `fun`. Thus, algorithm function should have the formal arguments `job`, `data` and `instance`. * The function `makeDesign` has been removed. Parameters can be defined by just passing a `data.frame` or `data.table` to `addExperiments()`. For exhaustive designs, use `data.table::CJ()`. ## Template changes * The scheduler should directly execute the command: ``` Rscript -e 'batchtools::doJobCollection()' ``` There is no intermediate R source file like there was in `BatchJobs`. * All information stored in the object `?JobCollection` can be accessed while brewing the template. * Extra variables may be passed via the argument `resoures` of `submitJobs()`. ## New features * Support for Docker Swarm via `?ClusterFunctionsDocker`. * Jobs can now be tagged and untagged to provide an easy way to group them. * Some resources like the number of CPUs are now optionally passed to [parallelMap](https://cran.r-project.org/package=parallelMap). This eases nested parallelization, e.g. to use multicore parallelization on the slave by just setting a resource on the master. See `submitJobs()` for an example. * `?ClusterFunctions` are now more flexible in general as they can define hook functions which will be called at certain events. `?ClusterFunctionsDocker` is an example use case which implements a housekeeping routine. This routine is called every time before a job is about to get submitted to the scheduler (in the case: the Docker Swarm) via the hook `pre.submit` and every time directly after the registry synchronized jobs stored on the file system via the hook `post.sync`. ## Porting to `batchtools` The following table assists in porting to batchtools by mapping BatchJobs/BatchExperiments functions to their counterparts in batchtools. The table does not cover functions which are (a) used only internally in BatchJobs and (b) functions which have not been renamed. | BatchJobs | batchtools | | ------------- | :-------------: | | `addRegistryPackages` | Set `reg$packages` or `reg$namespaces`, call saveRegistry()] | | `addRegistrySourceDirs` | - | | `addRegistrySourceFiles` | Set `reg$source`, call saveRegistry() | | `batchExpandGrid` | `batchMap(..., args = CJ(x = 1:3, y = 1:10))` | | `batchMapQuick` | `btmapply()` | | `batchReduceResults` | - | | `batchUnexport` | `batchExport()` | | `filterResults` | - | | `getJobIds` | `findJobs()` | | `getJobInfo` | `getJobStatus()` | | `getJob` | `makeJob()` | | `getJobParamDf` | `getJobPars()` | | `loadResults` | `reduceResultsList()` | | `reduceResultsDataFrame` | `reduceResultsDataTable()` | | `reduceResultsMatrix` | `reduceResultsList()` + `do.call(rbind, res)` | | `reduceResultsVector` | `reduceResultsDataTable()` | | `setJobFunction` | - | | `setJobNames` | - | | `showStatus` | `getStatus()` | # Example 1: Approximation of $\pi$ To get a first insight into the usage of `batchtools`, we start with an exemplary Monte Carlo simulation to approximate $\pi$. For background information, see [Wikipedia](https://en.wikipedia.org/wiki/Monte_Carlo_method). First, a so-called registry object has to be created, which defines a directory where all relevant information, files and results of the computational jobs will be stored. There are two different types of registry objects: First, a regular `?Registry` which we will use in this example. Second, an `?ExperimentRegistry` which provides an alternative way to define computational jobs and thereby is tailored for a broad range of large scale computer experiments. Here, we use a temporary registry which is stored in the temp directory of the system and gets automatically deleted if you close the R session. ```{r, message = FALSE} reg = makeRegistry(file.dir = NA, seed = 1) ``` For a permanent registry, set the `file.dir` to a valid path. It can then be reused later, e.g., when you login to the system again, by calling the function `loadRegistry(file.dir)`. When a registry object is created or loaded, it is stored for the active R session as the default. Therefore the argument `reg` will be ignored in functions calls of this example, assuming the correct registry is set as default. To get the current default registry, `getDefaultRegistry()` can be used. To switch to another registry, use `setDefaultRegistry()`. First, we create a function which samples $n$ points $(x_i, y_i)$ whereas $x_i$ and $y_i$ are distributed uniformly, i.e. $x_i, y_i \sim \mathcal{U}(0,1)$. Next, the distance to the origin $(0, 0)$ is calculated and the fraction of points in the unit circle ($d \leq 1$) is returned. ```{r} piApprox = function(n) { nums = matrix(runif(2 * n), ncol = 2) d = sqrt(nums[, 1]^2 + nums[, 2]^2) 4 * mean(d <= 1) } set.seed(42) piApprox(1000) ``` We now parallelize `piApprox()` with `batchtools`: We create 10 jobs, each doing a MC simulation with $10^5$ jobs. We use `batchMap()` to define the jobs (note that this does not yet start the calculation): ```{r} batchMap(fun = piApprox, n = rep(1e5, 10)) ``` The length of the vector or list defines how many different jobs are created, while the elements itself are used as arguments for the function. The function `batchMap(fun, ...)` works analogously to `Map(f, ...)` of the base package. An overview over the jobs and their IDs can be retrieved with `getJobTable()` which returns a data.frame with all relevant information: ```{r} names(getJobTable()) ``` Note that a unique job ID is assigned to each job. These IDs can be used to restrict operations to subsets of jobs. To actually start the calculation, call `submitJobs()`. The registry and the selected job IDs can be taken as arguments as well as an arbitrary list of resource requirements, which are to be handled by the cluster back end. ```{r} submitJobs(resources = list(walltime = 3600, memory = 1024)) ``` In this example, a cap for the execution time (so-called walltime) and for the maximum memory requirements are set. The progress of the submitted jobs can be checked with `getStatus()`. ```{r} getStatus() ``` The resulting output includes the number of jobs in the registry, how many have been submitted, have started to execute on the batch system, are currently running, have successfully completed, and have terminated due to an R exception. After jobs have successfully terminated, we can load their results on the master. This can be done in a simple fashion by using either `loadResult()`, which returns a single result exactly in the form it was calculated during mapping, or by using `reduceResults()`, which is a version of `Reduce()` from the base package for registry objects. ```{r} waitForJobs() mean(sapply(1:10, loadResult)) reduceResults(function(x, y) x + y) / 10 ``` If you are absolutely sure that your function works, you can take a shortcut and use *batchtools* in an `lapply` fashion using `btlapply()`. This function creates a temporary registry (but you may also pass one yourself), calls `batchMap()`, wait for the jobs to terminate with `waitForJobs()` and then uses `reduceResultsList()` to return the results. ```{r, R.options=list(batchtools.verbose=FALSE)} res = btlapply(rep(1e5, 10), piApprox) mean(unlist(res)) ``` # Example 2: Machine Learning We stick to a rather simple, but not unrealistic example to explain some further functionalities: Applying two classification learners to the famous iris data set (Anderson 1935), vary a few hyperparameters and evaluate the effect on the classification performance. First, we create a registry, the central meta-data object which records technical details and the setup of the experiments. We use an `?ExperimentRegistry` where the job definition is split into creating problems and algorithms. See the paper on [BatchJobs and BatchExperiments](https://www.jstatsoft.org/article/view/v064i11) for a detailed explanation. Again, we use a temporary registry and make it the default registry. ```{r, message = FALSE} library(batchtools) reg = makeExperimentRegistry(file.dir = NA, seed = 1) ``` ## Problems and Algorithms By adding a problem to the registry, we can define the data on which certain computational jobs shall work. This can be a matrix, data frame or array that always stays the same for all subsequent experiments. But it can also be of a more dynamic nature, e.g., subsamples of a dataset or random numbers drawn from a probability distribution . Therefore the function `addProblem()` accepts static parts in its `data` argument, which is passed to the argument `fun` which generates a (possibly stochastic) problem instance. For `data`, any R object can be used. If only `data` is given, the generated instance is `data`. The argument `fun` has to be a function with the arguments `data` and `job` (and optionally other arbitrary parameters). The argument `job` is an object of type `?Job` which holds additional information about the job. We want to split the iris data set into a training set and test set. In this example we use use subsampling which just randomly takes a fraction of the observations as training set. We define a problem function which returns the indices of the respective training and test set for a split with `100 * ratio`% of the observations being in the test set: ```{r} subsample = function(data, job, ratio, ...) { n = nrow(data) train = sample(n, floor(n * ratio)) test = setdiff(seq_len(n), train) list(test = test, train = train) } ``` `addProblem()` files the problem to the file system and the problem gets recorded in the registry. ```{r} data("iris", package = "datasets") addProblem(name = "iris", data = iris, fun = subsample, seed = 42) ``` The function call will be evaluated at a later stage on the workers. In this process, the `data` part will be loaded and passed to the function. Note that we set a problem seed to synchronize the experiments in the sense that the same resampled training and test sets are used for the algorithm comparison in each distinct replication. The algorithms for the jobs are added to the registry in a similar manner. When using `addAlgorithm()`, an identifier as well as the algorithm to apply to are required arguments. The algorithm must be given as a function with arguments `job`, `data` and `instance`. Further arbitrary arguments (e.g., hyperparameters or strategy parameters) may be defined analogously as for the function in `addProblem`. The objects passed to the function via `job` and `data` are here the same as above, while via `instance` the return value of the evaluated problem function is passed. The algorithm can return any R object which will automatically be stored on the file system for later retrieval. Firstly, we create an algorithm which applies a support vector machine: ```{r} svm.wrapper = function(data, job, instance, ...) { library("e1071") mod = svm(Species ~ ., data = data[instance$train, ], ...) pred = predict(mod, newdata = data[instance$test, ], type = "class") table(data$Species[instance$test], pred) } addAlgorithm(name = "svm", fun = svm.wrapper) ``` Secondly, a random forest of classification trees: ```{r} forest.wrapper = function(data, job, instance, ...) { library("ranger") mod = ranger(Species ~ ., data = data[instance$train, ], write.forest = TRUE) pred = predict(mod, data = data[instance$test, ]) table(data$Species[instance$test], pred$predictions) } addAlgorithm(name = "forest", fun = forest.wrapper) ``` Both algorithms return a confusion matrix for the predictions on the test set, which will later be used to calculate the misclassification rate. Note that using the `...` argument in the wrapper definitions allows us to circumvent naming specific design parameters for now. This is an advantage if we later want to extend the set of algorithm parameters in the experiment. The algorithms get recorded in the registry and the corresponding functions are stored on the file system. Defined problems and algorithms can be queried with: ```{r} reg$problems reg$algorithms ``` The flow to define experiments is summarized in the following figure: ```{r,echo=FALSE} knitr::include_graphics("tikz_prob_algo_simple.png", auto_pdf = TRUE) ``` ## Creating jobs `addExperiments()` is used to parametrize the jobs and thereby define computational jobs. To do so, you have to pass named lists of parameters to `addExperiments()`. The elements of the respective list (one for problems and one for algorithms) must be named after the problem or algorithm they refer to. The data frames contain parameter constellations for the problem or algorithm function where columns must have the same names as the target arguments. When the problem design and the algorithm design are combined in `addExperiments()`, each combination of the parameter sets of the two designs defines a distinct job. How often each of these jobs should be computed can be determined with the argument `repls`. ```{r} # problem design: try two values for the ratio parameter pdes = list(iris = data.table(ratio = c(0.67, 0.9))) # algorithm design: try combinations of kernel and epsilon exhaustively, # try different number of trees for the forest ades = list( svm = CJ(kernel = c("linear", "polynomial", "radial"), epsilon = c(0.01, 0.1)), forest = data.table(ntree = c(100, 500, 1000)) ) addExperiments(pdes, ades, repls = 5) ``` The jobs are now available in the registry with an individual job ID for each. The function `summarizeExperiments()` returns a table which gives a quick overview over all defined experiments. ```{r} summarizeExperiments() summarizeExperiments(by = c("problem", "algorithm", "ratio")) ``` ## Before Submitting Before submitting all jobs to the batch system, we encourage you to test each algorithm individually. Or sometimes you want to submit only a subset of experiments because the jobs vastly differ in runtime. Another reoccurring task is the collection of results for only a subset of experiments. For all these use cases, `findExperiments()` can be employed to conveniently select a particular subset of jobs. It returns the IDs of all experiments that match the given criteria. Your selection can depend on substring matches of problem or algorithm IDs using `prob.name` or `algo.name`, respectively. You can also pass R expressions, which will be evaluated in your problem parameter setting (`prob.pars`) or algorithm parameter setting (`algo.pars`). The expression is then expected to evaluate to a Boolean value. Furthermore, you can restrict the experiments to specific replication numbers. To illustrate `findExperiments()`, we will select two experiments, one with a support vector machine and the other with a random forest and the parameter `ntree = 1000`. The selected experiment IDs are then passed to testJob. ```{r} id1 = head(findExperiments(algo.name = "svm"), 1) print(id1) id2 = head(findExperiments(algo.name = "forest", algo.pars = (ntree == 1000)), 1) print(id2) testJob(id = id1) testJob(id = id2) ``` If something goes wrong, `batchtools` comes with a bunch of useful debugging utilities (see separate vignette on error handling). If everything turns out fine, we can proceed with the calculation. ## Submitting and Collecting Results To submit the jobs, we call `submitJobs()` and wait for all jobs to terminate using `waitForJobs()`. ```{r} submitJobs() waitForJobs() ``` After jobs are finished, the results can be collected with `reduceResultsDataTable()` where we directly extract the mean misclassification error: ```{r} reduce = function(res) list(mce = (sum(res) - sum(diag(res))) / sum(res)) results = unwrap(reduceResultsDataTable(fun = reduce)) head(results) ``` Next, we merge the results table with the table of job parameters using one of the join helpers (see `?JoinTables`) provided by `batchtools` (here, we use an inner join): ```{r} pars = unwrap(getJobPars()) tab = ijoin(pars, results) head(tab) ``` We now aggregate the results group-wise. You can use [`data.table`](https://cran.r-project.org/package=data.table), `base::aggregate()`, or the [`dplyr`](https://cran.r-project.org/package=dplyr) package for this purpose. Here, we use [`data.table`](https://cran.r-project.org/package=data.table) to subset the table to jobs where the ratio is `0.67` and group by algorithm the algorithm hyperparameters: ```{r} tab[ratio == 0.67, list(mmce = mean(mce)), by = c("algorithm", "kernel", "epsilon", "ntree")] ``` # Example: Error Handling In any large scale experiment many things can and will go wrong. The cluster might have an outage, jobs may run into resource limits or crash, subtle bugs in your code could be triggered or any other error condition might arise. In these situations it is important to quickly determine what went wrong and to recompute only the minimal number of required jobs. Therefore, before you submit anything you should use `testJob()` to catch errors that are easy to spot because they are raised in many or all jobs. If `external` is set, this function runs the job without side effects in an independent R process on your local machine via `Rscript` similar as on the slave, redirects the output of the process to your R console, loads the job result and returns it. If you do not set `external`, the job is executed is in the currently running R session, with the drawback that you might be unable to catch missing variable declarations or missing package dependencies. By way of illustration here is a small example. First, we create a temporary registry. ```{r, message = FALSE} library(batchtools) reg = makeRegistry(file.dir = NA, seed = 1) ``` Ten jobs are created, one will trow a warning and two of them will raise an exception. ```{r} flakeyFunction <- function(value) { if (value == 5) warning("Just a simple warning") if (value %in% c(2, 9)) stop("Ooops.") value^2 } batchMap(flakeyFunction, 1:10) ``` Now that the jobs are defined, we can test jobs independently: ```{r} testJob(id = 1) ``` In this case, testing the job with ID = 1 provides the appropriate result but testing the job with ID = 2 leads to an error: ```{r} as.character(try(testJob(id = 2))) ``` We ignore the error here, and just assume everything looks fine and submit all jobs. ```{r} submitJobs() waitForJobs() ``` After you have submitted jobs and suspect that something is going wrong, the first thing to do is to run `getStatus()` to display a summary of the current state of the system. ```{r} getStatus() ``` The status message shows that two of the jobs could not be executed successfully. To get the IDs of all jobs that failed due to an error we can use `findErrors()` and to retrieve the actual error message, we can use `getErrorMessages()`. ```{r} findErrors() getErrorMessages() ``` If we want to peek into the R log file of a job to see more context for the error we can use `showLog()` which opens a pager or use `getLog()` to get the log as character vector: ```{r} tail(getLog(id = 9)) ``` You can also grep for messages (output suppressed in this vignette for technical reasons): ```{r,eval=FALSE} grepLogs(pattern = "simple", ignore.case = TRUE) ``` # Workflow ## On the Local System 1. Create a Registry with `makeRegistry()` / `makeExperimentRegistry()` or load an existing from the file system with `loadRegistry()`. 2. Define computational jobs with `batchMap()` / `batchReduce()` if you used `makeRegistry()` or define an experiment with `addAlgorithm()`, `addProblem()` and `addExperiments()` if you started with `makeExperimentRegistry()`. It is advised to test some jobs with `testJob()` in the interactive session and with `testJob(external = TRUE)` in a separate R process. Note that you can add additional jobs if you are using an `?ExperimentRegistry`. 3. If required, query the data base for job ids depending on their status, parameters or tags (see `findJobs()`). The returned tables can easily be combined in a set-like fashion with data base verbs: union (`ojoin()` for outer join, intersect `ijoin()` for inner join, difference `ajoin()` for anti join). 4. Submit jobs with `submitJobs()`. You can specify job resources here. If you have thousands of fast terminating jobs, you want to `chunk()` them first. If some jobs already terminated, you can estimate the runtimes with `estimateRuntimes()` and chunk jobs into heterogeneous groups with `lpt()` and `binpack()`. 5. Monitor jobs. `getStatus()` gives a summarizing overview. Use `showLog()` and `grepLogs()` to investigate log file. Run jobs in the currently running session with `testJob()` to get a `traceback()`. 6. Collect (partial) results. `loadResult()` retrieves a single result from the file system. `reduceResults()` mimics `Reduce()` and allows to apply a function to many files in an iterative fashion. `reduceResultsList()` and `reduceResultsDataTable()` collect results into a `list` or `data.table`, respectively. ```{r,echo=FALSE} knitr::include_graphics("function_overview.png", auto_pdf = TRUE) ``` ## On Multiple Systems Most users develop and prototype their experiments on a desktop box in their preferred IDE and later deploy to a large computing cluster. This can be done by prototyping locally (`testJob()`) or submit subsets via `submitJobs()`. To deploy to the cluster, just copy the file directory (as reported by `reg$file.dir`) to the remote system. Next, log in on the cluster (typically via `ssh`), `cd` to the copied directory and call `loadRegistry(", "", writeable = TRUE)`. This function will (a) source the local configuration file so that you can talk to the cluster (verify by checking the output of `reg$cluster.functions`) and (b) adjust the paths to the new system if argument `update.paths` is set. After loading the Registry, it is advised to test some jobs again with `testJob()` before submitting all of them with `submitJobs(resources = list())` (remember you now need to set resources!). After some jobs are finished, the `file.dir` can be copied back (do not merge with the previous directory!) and loaded again with `loadRegistry()`. This approach is totally viable as long as some general rules are followed: 1. Make sure you have all packages installed. Package versions can be synchronized across machines with, e.g., [`checkpoint`](https://cran.r-project.org/package=checkpoint) or [`packrat`](https://cran.r-project.org/package=packrat). 2. Test jobs on the remote system prior to submit to ensure that paths are resolved correctly. 3. Make sure you have set the cluster functions in a configuration file, and stick to one backend as long as jobs are running. 4. The status can only be monitored on the remote system (for obvious reasons). 5. Partial results can be inspected both on the remote system and on the local system. For the latter, you need to copy over the **complete** `file.dir` first. Overwriting/merging directories is not advised as this may lead to inconsistencies if you added or removed experiments on the remote. If you have to merge, use `rsync` with option `--delete`. Load the registry locally with `loadRegistry()` and collect results. Do not copy back and forth. 6. Avoid accessing the `file.dir` with multiple sessions simultaneously. This includes accessing the registry via a mount! Simultaneous access may lead to inconsistencies and missing results. ================================================ FILE: vignettes/function_overview.tex ================================================ \documentclass[crop,tikz,convert]{standalone} \usetikzlibrary{shapes,matrix,positioning,chains,arrows,shadows,decorations.pathmorphing,fit,backgrounds} \begin{document} \begin{tikzpicture}[auto] \tikzstyle{box} = [rectangle, drop shadow, draw=black, fill=white, thick, minimum width=4cm, rounded corners, align=center,font=\ttfamily\large] \tikzstyle{chead} = [font=\large\bfseries] \tikzstyle{rhead} = [chead,align=left, minimum width=4cm] \tikzstyle{bg} = [rectangle, fill=gray!10, inner sep=0.2cm, rounded corners=5mm] \tikzstyle{hl} = [rectangle, draw=red, inner sep=0.2cm, rounded corners=5mm] \matrix [row sep=10mm, column sep=5mm] (mat) { \node (chead0) [minimum width=4cm] {}; \pgfmatrixnextcell \node (chead1) [chead] {Regular Registry}; \pgfmatrixnextcell \node (chead2) [chead] {Common}; \pgfmatrixnextcell \node (chead3) [chead] {Experiment Registry}; \\ \node (registry0) [rhead] {(1) Create Registry}; \pgfmatrixnextcell \node (registry1) [box] {makeRegistry}; \pgfmatrixnextcell \node (registry2) {}; \pgfmatrixnextcell \node (registry3) [box] {makeExperimentRegistry}; \\ \node (define0) [rhead] {(2) Define Jobs}; \pgfmatrixnextcell \node (define1) [box] {batchMap \\ batchReduce}; \pgfmatrixnextcell \node (define2) [box] {batchMapResults}; \pgfmatrixnextcell \node (define3) [box] {addProblem \\ addAlgorithm \\ addExperiments}; \\ \node (subsetting0) [rhead] {(3) Subset Jobs}; \pgfmatrixnextcell \node (subsetting1) [box] {findJobs}; \pgfmatrixnextcell \node (subsetting2) [box] {findDone\\ findErrors \\\ldots}; \pgfmatrixnextcell \node (subsetting3) [box] {findExperiments}; \\ \node (submit0) [rhead] {(4) Submit Jobs}; \pgfmatrixnextcell \node (submit1) {}; \pgfmatrixnextcell \node (submit2) [box] {submitJobs}; \pgfmatrixnextcell \node (submit3) {}; \\ \node (status0) [rhead] {(5) Monitor \& Debug}; \pgfmatrixnextcell \node (status1) {}; \pgfmatrixnextcell \node (status2) [box] {getStatus \\ testJob \\ showLog \\ grepLogs}; \pgfmatrixnextcell \node (status3) [box] {summarizeExperiments}; \\ \node (collect0) [rhead] {(6) Collect Results}; \pgfmatrixnextcell \node (collect1) {}; \pgfmatrixnextcell \node (collect2) [box] {loadResult \\ reduceResults \\ reduceResults[List|DataTable]}; \pgfmatrixnextcell \node (collect3) {}; \\ }; \begin{pgfonlayer}{background} \node [bg, fit=(chead0) (collect0)] {}; \node [bg, fit=(chead0) (chead3)] {}; \end{pgfonlayer} \end{tikzpicture} \end{document} ================================================ FILE: vignettes/tikz_prob_algo_simple.tex ================================================ \documentclass[crop,tikz,convert]{standalone} \usetikzlibrary{shapes,matrix,positioning,chains,arrows,shadows,decorations.pathmorphing,fit,backgrounds} \begin{document} \begin{tikzpicture}[auto] \tikzstyle{userinput}=[rectangle, drop shadow, draw=black, fill=black!10, thick, minimum width=4cm, align=center] \tikzstyle{internal}=[rectangle, drop shadow, draw=black, fill=white, thick, minimum width=4cm, rounded corners, align=center] \tikzstyle{result}=[ellipse, drop shadow, draw=black, fill=white, thick, align=center, minimum width=3cm] \tikzstyle{line} = [draw, thick, -latex'] \tikzstyle{sline} = [draw, thick, -latex',decorate, decoration={snake, segment length=2mm,post length=2mm}] \matrix [row sep=10mm, column sep=20mm] { % first row \node {}; & \node {}; & \node [result] (result) {result}; \\ % second row \node [userinput] (static_problem_part) { static problem part\\\texttt{data} }; & \node [userinput] (dynamic_problem_part) { dynamic problem function\\ \texttt{fun(data, ...)} }; & \node [userinput] (algorithm) { algorithm function\\ \texttt{fun(data, instance, ...)} }; \\ % third row \node {}; & \node [userinput] (problem_design) { problem design\\ (\texttt{addExperiments}) }; & \node [userinput] (algorithm_design) { algorithm design\\ (\texttt{addExperiments}) }; \\ }; \draw [sline] (algorithm) to (result) ; \draw [line] (static_problem_part) to (dynamic_problem_part); \draw [sline] (dynamic_problem_part) to node {\texttt{instance}} (algorithm) ; \draw [line] (static_problem_part) to [out=0, in=0, bend left=20] (algorithm); \draw [line] (problem_design) to node {\texttt{...}} (dynamic_problem_part); \draw [line] (algorithm_design) to node {\texttt{...}} (algorithm); \end{tikzpicture} \end{document}