From 34b46ad026edadc535b920b4adfa2778232ca07f Mon Sep 17 00:00:00 2001 From: Trevor Keller Date: Thu, 15 May 2025 07:41:12 -0400 Subject: [PATCH 1/7] Rmd conversion --- config.yaml | 5 +- .../{10-hpc-intro.md => 10-hpc-intro.Rmd} | 54 +++++++------- episodes/11-connecting.Rmd | 20 +++--- episodes/13-hpcc-scheduler/hpcc/section2.rmd | 8 --- episodes/load_config.R | 25 ++++--- episodes/slurm_defaults.yaml | 70 +++++++++++++++++++ ...structor-notes.md => instructor-notes.Rmd} | 0 7 files changed, 122 insertions(+), 60 deletions(-) rename episodes/{10-hpc-intro.md => 10-hpc-intro.Rmd} (79%) delete mode 100644 episodes/13-hpcc-scheduler/hpcc/section2.rmd create mode 100644 episodes/slurm_defaults.yaml rename instructors/{instructor-notes.md => instructor-notes.Rmd} (100%) diff --git a/config.yaml b/config.yaml index a117e923..5aa74027 100644 --- a/config.yaml +++ b/config.yaml @@ -65,7 +65,7 @@ contact: 'team@carpentries.org' # Order of episodes in your lesson episodes: - - 10-hpc-intro.md + - 10-hpc-intro.Rmd - 11-connecting.Rmd - 12-cluster.Rmd - 13-scheduler.Rmd @@ -78,12 +78,15 @@ episodes: # Information for Learners learners: + - setup.md # Information for Instructors instructors: + - instructor-notes.Rmd # Learner Profiles profiles: + - learner-profiles.md # Customisation --------------------------------------------- # diff --git a/episodes/10-hpc-intro.md b/episodes/10-hpc-intro.Rmd similarity index 79% rename from episodes/10-hpc-intro.md rename to episodes/10-hpc-intro.Rmd index 4292cedc..6b7e88c9 100644 --- a/episodes/10-hpc-intro.md +++ b/episodes/10-hpc-intro.Rmd @@ -22,7 +22,7 @@ Frequently, research problems that use computing can outgrow the capabilities of the desktop or laptop computer where they started: - A statistics student wants to cross-validate a model. This involves running - the model 1000 times -- but each run takes an hour. Running the model on + the model 1000 times — but each run takes an hour. Running the model on a laptop will take over a month! In this research problem, final results are calculated after all 1000 models have run, but typically only one model is run at a time (in **serial**) on the laptop. Since each of the 1000 runs is @@ -30,7 +30,7 @@ of the desktop or laptop computer where they started: possible to run them all at once (in **parallel**). - A genomics researcher has been using small datasets of sequence data, but soon will be receiving a new type of sequencing data that is 10 times as - large. It's already challenging to open the datasets on a computer -- + large. It's already challenging to open the datasets on a computer — analyzing these larger datasets will probably crash it. In this research problem, the calculations required might be impossible to parallelize, but a computer with **more memory** would be required to analyze the much larger @@ -54,7 +54,7 @@ problems in parallel**. ## Jargon Busting Presentation -Open the [HPC Jargon Buster](../files/jargon#p1) +Open the [HPC Jargon Buster](files/jargon.html#p1) in a new tab. To present the content, press `C` to open a **c**lone in a separate window, then press `P` to toggle **p**resentation mode. @@ -71,48 +71,44 @@ results. ## Some Ideas - Checking email: your computer (possibly in your pocket) contacts a remote - machine, authenticates, and downloads a list of new messages; it also - uploads changes to message status, such as whether you read, marked as - junk, or deleted the message. Since yours is not the only account, the - mail server is probably one of many in a data center. -- Searching for a phrase online involves comparing your search term against - a massive database of all known sites, looking for matches. This "query" + machine, authenticates, and downloads a list of new messages; it also uploads + changes to message status, such as whether you read, marked as junk, or + deleted the message. Since yours is not the only account, the mail server is + probably one of many in a data center. +- Searching for a phrase online involves comparing your search term against a + massive database of all known sites, looking for matches. This "query" operation can be straightforward, but building that database is a [monumental task][mapreduce]! Servers are involved at every step. -- Searching for directions on a mapping website involves connecting your - (A) starting and (B) end points by [traversing a graph][dijkstra] in - search of the "shortest" path by distance, time, expense, or another - metric. Converting a map into the right form is relatively simple, but - calculating all the possible routes between A and B is expensive. +- Searching for directions on a mapping website involves connecting your (A) + starting and (B) end points by [traversing a graph][dijkstra] in search of + the "shortest" path by distance, time, expense, or another metric. Converting + a map into the right form is relatively simple, but calculating all the + possible routes between A and B is expensive. Checking email could be serial: your machine connects to one server and exchanges data. Searching by querying the database for your search term (or -endpoints) could also be serial, in that one machine receives your query -and returns the result. However, assembling and storing the full database -is far beyond the capability of any one machine. Therefore, these functions -are served in parallel by a large, ["hyperscale"][hyperscale] collection of -servers working together. - - +endpoints) could also be serial, in that one machine receives your query and +returns the result. However, assembling and storing the full database is far +beyond the capability of any one machine. Therefore, these functions are served +in parallel by a large, ["hyperscale"][hyperscale] collection of servers +working together. ::::::::::::::::::::::::: :::::::::::::::::::::::::::::::::::::::::::::::::: - - [mapreduce]: https://en.wikipedia.org/wiki/MapReduce [dijkstra]: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm [hyperscale]: https://en.wikipedia.org/wiki/Hyperscale_computing - :::::::::::::::::::::::::::::::::::::::: keypoints -- High Performance Computing (HPC) typically involves connecting to very large computing systems elsewhere in the world. -- These other systems can be used to do work that would either be impossible or much slower on smaller systems. +- High Performance Computing (HPC) typically involves connecting to very large + computing systems elsewhere in the world. +- These other systems can be used to do work that would either be impossible or + much slower on smaller systems. - HPC resources are shared by multiple users. -- The standard method of interacting with such systems is via a command line interface. +- The standard method of interacting with such systems is via a command line + interface. :::::::::::::::::::::::::::::::::::::::::::::::::: - - diff --git a/episodes/11-connecting.Rmd b/episodes/11-connecting.Rmd index a6635d74..3ef0dc90 100644 --- a/episodes/11-connecting.Rmd +++ b/episodes/11-connecting.Rmd @@ -38,15 +38,17 @@ results. If you have ever opened the Windows Command Prompt or macOS Terminal, you have seen a CLI. If you have already taken The Carpentries' courses on the UNIX Shell or Version Control, you have used the CLI on your *local machine* -extensively. The only leap to be made here is to open a CLI on a *remote machine*, -while taking some precautions so that other folks on the network can't see (or -change) the commands you're running or the results the remote machine sends -back. We will use the Secure SHell protocol (or SSH) to open an encrypted -network connection between two machines, allowing you to send \& receive text -and data without having to worry about prying eyes. - -![](/fig/connect-to-remote.svg){max-width="50%" alt="Connect to cluster"} - +extensively. The only leap to be made here is to open a CLI on a *remote +machine*, while taking some precautions so that other folks on the network +can't see (or change) the commands you're running or the results the remote +machine sends back. We will use the Secure SHell protocol (or SSH) to open an +encrypted network connection between two machines, allowing you to send \& +receive text and data without having to worry about prying eyes. + +![connect-to-remote.svg](/fig/connect-to-remote.svg){ + max-width="50%" + alt="Connect to cluster. " +} SSH clients are usually command-line tools, where you provide the remote machine address as the only required argument. If your username on the remote diff --git a/episodes/13-hpcc-scheduler/hpcc/section2.rmd b/episodes/13-hpcc-scheduler/hpcc/section2.rmd deleted file mode 100644 index 48ec3cdb..00000000 --- a/episodes/13-hpcc-scheduler/hpcc/section2.rmd +++ /dev/null @@ -1,8 +0,0 @@ ---- -layout: page -author: Alan O'Cais -comment: Snippet test ---- -::: discussion -This is a second snippet that only appears in the backup. -::: diff --git a/episodes/load_config.R b/episodes/load_config.R index 872489eb..b00ec7a1 100644 --- a/episodes/load_config.R +++ b/episodes/load_config.R @@ -1,30 +1,28 @@ +## R script to chain-load lesson configuration YAML files. +## Top-level configuration is `/episodes/lesson_config.yml` -# Function to merge two lists (with overrides) -merge_lists <- function(base, override) { - modifyList(base, override) -} - -# Load required package library(yaml) -# Load primary configuration + +## Load primary configuration config <- yaml.load_file("lesson_config.yaml") -# If 'config' key exists, load the second configuration and merge + +## If "config" key exists, load the second configuration and merge if (!is.null(config$main_config) && file.exists(config$main_config)) { override_config <- yaml.load_file(config$main_config) - config <- merge_lists(config, override_config) + config <- modifyList(config, override_config) } -snippets <- paste('files/snippets/', config$snippets, sep='') +snippets <- paste("files/snippets/", config$snippets, sep="") # Extract main and fallback paths from config -main_snippets <- config$main_snippets +main_snippets <- config$main_snippets fallback_snippets <- config$fallback_snippets # Function to choose the correct document path (or return NULL if neither exists) choose_doc <- function(child_file) { # Get the current document name (without extension) current_doc <- tools::file_path_sans_ext(knitr::current_input(dir = TRUE)) - + # Build paths for the child document inside subdirectories doc_paths <- list( main = file.path(current_doc, main_snippets, child_file), @@ -32,6 +30,7 @@ choose_doc <- function(child_file) { ) print(doc_paths) print(getwd()) + # Return the valid path, or NULL if neither exists if (file.exists(doc_paths$main)) { print("Returning") @@ -45,4 +44,4 @@ choose_doc <- function(child_file) { print("Returning NULL") return(NULL) # Return NULL if neither path exists } -} \ No newline at end of file +} diff --git a/episodes/slurm_defaults.yaml b/episodes/slurm_defaults.yaml new file mode 100644 index 00000000..fc550217 --- /dev/null +++ b/episodes/slurm_defaults.yaml @@ -0,0 +1,70 @@ +# Fail-safe defaults and implicit schema for lesson configuration files +--- +snippets: slurm +baseurl: "https://ocaisa.github.io/probable-pancake/" +# main_config: "lesson_config.yaml" + +# about the Learner's laptop +local: + prompt: "[you@laptop:~]$" # command-line prompt + shebang: "#!/bin/bash" # first line of every shell script + +# about the remote/cluster environment +remote: + name: "Example Cluster" # Name of the cluster (proper noun) + login: "cluster.example.com" # domain name of the login node + host: "head" # hostname of the login node + node: "node" # hostname of a compute node + location: "SchedMD" # institutional host of the cluster + homedir: "/home" # parent of home directories + user: "userid" # stand-in for the username + prompt: "[userid@head:~]" # command-line prompt + prompt_work: "[userid@head:/work/userid]" # prompt under /work + module_python3: "Python" # name of the module providing py3 + shebang: "#!/bin/bash" # first line of every shell script + +# Commands & flags for the scheduler environment +sched: + name: "Slurm" # proper name of the scheduler + command: + batch: "sbatch" # run later + interactive: "srun" # run now + cancel: "scancel" # don't run + queue: + test: "debug" + prod: "batch" + status: "squeue" + flag: + user: "-u userid" + interactive: "--pty bash" + histdetail: "-l -j" + name: "-J" + time: "-t" + queue: "-p" + nodes: "-N" + tasks: "-n" + del: "scancel" + interactive: + command: "srun" + info: + command: "sinfo" + comment: "#SBATCH" + hist: "sacct -u userid" + hist_filter: "" + reservation: "" + qos: "" + budget: "" + project: "" + +# submit: +# salloc: obtain a job allocation +# sbatch: submit a batch script for later execution +# srun: obtain an allocation and execute an application +# account: +# sacct: display accounting data +# manage: +# sbcast: transfer a file to a job's compute nodes +# scancel: signal jobs/steps +# squeue: view information about jobs +# sinfo: view information about nodes & partitions +# scontrol: view & modify state diff --git a/instructors/instructor-notes.md b/instructors/instructor-notes.Rmd similarity index 100% rename from instructors/instructor-notes.md rename to instructors/instructor-notes.Rmd From c287cd7d1cba22443e8e3ae4ac198c06612bca2f Mon Sep 17 00:00:00 2001 From: Trevor Keller Date: Thu, 21 Aug 2025 16:26:24 -0400 Subject: [PATCH 2/7] Initialize a testing snippet library with clearly-wrong data --- .../Ghastly_Mistakes/_config_options.yml | 65 ++++++++++++++ .../Ghastly_Mistakes/cluster/queue-info.Rmd | 5 ++ .../cluster/specific-node-info.Rmd | 12 +++ .../modules/available-modules.snip | 21 +++++ .../modules/default-modules.snip | 4 + .../modules/missing-python.snip | 33 +++++++ .../modules/module-load-python.snip | 5 ++ .../modules/python-executable-dir.snip | 4 + .../modules/python-ls-dir-command.snip | 4 + .../modules/python-ls-dir-output.snip | 16 ++++ .../modules/python-module-path.snip | 4 + .../modules/software-dependencies.snip | 87 +++++++++++++++++++ .../modules/wrong-gcc-version.snip | 5 ++ .../parallel/eight-tasks-jobscript.snip | 16 ++++ .../parallel/four-tasks-jobscript.snip | 16 ++++ .../parallel/one-task-jobscript.snip | 14 +++ .../resources/account-history.snip | 14 +++ .../resources/monitor-processes-top.snip | 19 ++++ .../resources/system-memory-free.snip | 6 ++ .../scheduler/basic-job-script.Rmd | 3 + .../scheduler/basic-job-status.Rmd | 8 ++ .../scheduler/job-with-name-status.Rmd | 4 + .../scheduler/option-flags-list.Rmd | 17 ++++ .../scheduler/print-sched-variables.Rmd | 32 +++++++ .../scheduler/runtime-exceeded-job.Rmd | 3 + .../scheduler/runtime-exceeded-output.Rmd | 5 ++ .../scheduler/terminate-job-begin.Rmd | 6 ++ .../scheduler/terminate-job-cancel.Rmd | 3 + .../scheduler/terminate-multiple-jobs.Rmd | 29 +++++++ .../scheduler/using-nodes-interactively.Rmd | 68 +++++++++++++++ .../filezilla-ssh-tunnel-instructions.snip | 0 31 files changed, 528 insertions(+) create mode 100644 episodes/files/snippets/Ghastly_Mistakes/_config_options.yml create mode 100644 episodes/files/snippets/Ghastly_Mistakes/cluster/queue-info.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/cluster/specific-node-info.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/default-modules.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/missing-python.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-command.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-output.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/python-module-path.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/software-dependencies.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/wrong-gcc-version.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks-jobscript.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks-jobscript.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/parallel/one-task-jobscript.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/resources/account-history.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/resources/monitor-processes-top.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/resources/system-memory-free.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-script.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-status.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/job-with-name-status.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/option-flags-list.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/print-sched-variables.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-job.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-output.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-begin.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-cancel.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-multiple-jobs.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/scheduler/using-nodes-interactively.Rmd create mode 100644 episodes/files/snippets/Ghastly_Mistakes/transferring-files/filezilla-ssh-tunnel-instructions.snip diff --git a/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml b/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml new file mode 100644 index 00000000..76eb0075 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml @@ -0,0 +1,65 @@ +# ------------------------ +# Ghastly Mistakes Cluster +# ------------------------ +# +# This is not a real cluster, and its options are meant to +# highlight configuration changes while we develop the config +# chain-loader, per @tobyhodges' suggestion. +# +# Compute irresponsibly. +--- + +snippets: "/snippets_library/Ghastly_Mistakes" + +local: + prompt: "luigi@mushroomkingdom:~$" + bash_shebang: "#!/bin/bash" + +remote: + name: "Bowser's Castle" + login: "castle.bowser.org" + portal: "https://pipe.bowser.org" + host: "castle" + node: "turtle" + location: "World 8-4" + homedir: "/darkland" + user: "luigi" + module_python3: "Boa" + prompt: "luigi@castle:~$" + bash_shebang: "#!/bin/no" + +sched: + name: "Slurp" + submit: + name: "smash" + options: "" + queue: + debug: "doom" + testing: "gloom" + status: "skew" + flag: + user: "-u luigi" + interactive: "" + histdetail: "-l -j" + name: "-J" + time: "-t" + queue: "-p" + del: "scandle" + interactive: "spun" + info: "sink" + comment: "#SMASH" + hist: "sacked -u luigi" + hist_filter: "" + +episode_order: + - 10-hpc-intro + - 11-connecting + - 12-cluster + - 13-scheduler + - 14-environment-variables + - 15-modules + - 16-transferring-files + - 17-parallel + - 18-resources + - 19-responsibility + \ No newline at end of file diff --git a/episodes/files/snippets/Ghastly_Mistakes/cluster/queue-info.Rmd b/episodes/files/snippets/Ghastly_Mistakes/cluster/queue-info.Rmd new file mode 100644 index 00000000..29f69030 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/cluster/queue-info.Rmd @@ -0,0 +1,5 @@ +```output +PARTITION AVAIL TIMELIMIT NODES STATE NODELIST +doom up infinite 2 idle doo[1-2] +gloom up infinite 2 idle glo[1-2] +``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/cluster/specific-node-info.Rmd b/episodes/files/snippets/Ghastly_Mistakes/cluster/specific-node-info.Rmd new file mode 100644 index 00000000..c38b377b --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/cluster/specific-node-info.Rmd @@ -0,0 +1,12 @@ +::: challenge + +## Explore a Worker Node + +Finally, let's look at the resources available on the worker nodes +where your jobs will actually run. Try running this command to see +the name, CPUs and memory available on one of the worker nodes: + +```bash +`r config$remote$prompt` `r config$sched$info` -o "%n %c %m" | column -t +``` +::: diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.snip new file mode 100644 index 00000000..f6f3f50b --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.snip @@ -0,0 +1,21 @@ +``` +~~~ /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/modules/all ~~~ + Bazel/3.6.0-GCCcore-x.y.z NSS/3.51-GCCcore-x.y.z + Bison/3.5.3-GCCcore-x.y.z Ninja/1.10.0-GCCcore-x.y.z + Boost/1.72.0-gompi-2020a OSU-Micro-Benchmarks/5.6.3-gompi-2020a + CGAL/4.14.3-gompi-2020a-Python-3.x.y OpenBLAS/0.3.9-GCC-x.y.z + CMake/3.16.4-GCCcore-x.y.z OpenFOAM/v2006-foss-2020a + +[removed most of the output here for clarity] + + Where: + L: Module is loaded + Aliases: Aliases exist: foo/1.2.3 (1.2) means that "module load foo/1.2" + will load foo/1.2.3 + D: Default Module + +Use "module spider" to find all possible modules and extensions. +Use "module keyword key1 key2 ..." to search for all possible modules matching +any of the "keys". +``` +{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/default-modules.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/default-modules.snip new file mode 100644 index 00000000..a448dd96 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/default-modules.snip @@ -0,0 +1,4 @@ +``` +No Modulefiles Currently Loaded. +``` +{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/missing-python.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/missing-python.snip new file mode 100644 index 00000000..89039d32 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/missing-python.snip @@ -0,0 +1,33 @@ +If the `python3` command was unavailable, we would see output like + +``` +/usr/bin/which: no python3 in (/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin:/opt/software/slurm/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/home/{{site.remote.user}}/.local/bin:/home/{{site.remote.user}}/bin) +``` +{: .output} + +Note that this wall of text is really a list, with values separated +by the `:` character. The output is telling us that the `which` command +searched the following directories for `python3`, without success: + +``` +/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin +/opt/software/slurm/bin +/usr/local/bin +/usr/bin +/usr/local/sbin +/usr/sbin +/opt/puppetlabs/bin +/home/{{site.remote.user}}/.local/bin +/home/{{site.remote.user}}/bin +``` +{: .output} + +However, in our case we do have an existing `python3` available so we see + +``` +/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin/python3 +``` +{: .output} + +We need a different Python than the system provided one though, so let us load +a module to access it. diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.snip new file mode 100644 index 00000000..d9bab7b4 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.snip @@ -0,0 +1,5 @@ +``` +{{ site.remote.prompt }} module load {{ site.remote.module_python3 }} +{{ site.remote.prompt }} which python3 +``` +{: .language-bash} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.snip new file mode 100644 index 00000000..46dec092 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.snip @@ -0,0 +1,4 @@ +``` +/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin/python3 +``` +{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-command.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-command.snip new file mode 100644 index 00000000..80319d0a --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-command.snip @@ -0,0 +1,4 @@ +``` +{{ site.remote.prompt }} ls /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin +``` +{: .language-bash} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-output.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-output.snip new file mode 100644 index 00000000..01d010ba --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-output.snip @@ -0,0 +1,16 @@ +``` +2to3 nosetests-3.8 python rst2s5.py +2to3-3.8 pasteurize python3 rst2xetex.py +chardetect pbr python3.8 rst2xml.py +cygdb pip python3.8-config rstpep2html.py +cython pip3 python3-config runxlrd.py +cythonize pip3.8 rst2html4.py sphinx-apidoc +easy_install pybabel rst2html5.py sphinx-autogen +easy_install-3.8 __pycache__ rst2html.py sphinx-build +futurize pydoc3 rst2latex.py sphinx-quickstart +idle3 pydoc3.8 rst2man.py tabulate +idle3.8 pygmentize rst2odt_prepstyles.py virtualenv +netaddr pytest rst2odt.py wheel +nosetests py.test rst2pseudoxml.py +``` +{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/python-module-path.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/python-module-path.snip new file mode 100644 index 00000000..68e97df1 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/python-module-path.snip @@ -0,0 +1,4 @@ +``` +/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/SQLite/3.31.1-GCCcore-x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Tcl/8.6.10-GCCcore-x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/GCCcore/x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin:/opt/software/slurm/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/home/user01/.local/bin:/home/user01/bin +``` +{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/software-dependencies.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/software-dependencies.snip new file mode 100644 index 00000000..fe107f2e --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/software-dependencies.snip @@ -0,0 +1,87 @@ +To demonstrate, let's use `module list`. `module list` shows all loaded +software modules. + +``` +{{ site.remote.prompt }} module list +``` +{: .language-bash} + +``` +Currently Loaded Modules: + 1) GCCcore/x.y.z 4) GMP/6.2.0-GCCcore-x.y.z + 2) Tcl/8.6.10-GCCcore-x.y.z 5) libffi/3.3-GCCcore-x.y.z + 3) SQLite/3.31.1-GCCcore-x.y.z 6) Python/3.x.y-GCCcore-x.y.z +``` +{: .output} + +``` +{{ site.remote.prompt }} module load GROMACS +{{ site.remote.prompt }} module list +``` +{: .language-bash} + +``` +Currently Loaded Modules: + 1) GCCcore/x.y.z 14) libfabric/1.11.0-GCCcore-x.y.z + 2) Tcl/8.6.10-GCCcore-x.y.z 15) PMIx/3.1.5-GCCcore-x.y.z + 3) SQLite/3.31.1-GCCcore-x.y.z 16) OpenMPI/4.0.3-GCC-x.y.z + 4) GMP/6.2.0-GCCcore-x.y.z 17) OpenBLAS/0.3.9-GCC-x.y.z + 5) libffi/3.3-GCCcore-x.y.z 18) gompi/2020a + 6) Python/3.x.y-GCCcore-x.y.z 19) FFTW/3.3.8-gompi-2020a + 7) GCC/x.y.z 20) ScaLAPACK/2.1.0-gompi-2020a + 8) numactl/2.0.13-GCCcore-x.y.z 21) foss/2020a + 9) libxml2/2.9.10-GCCcore-x.y.z 22) pybind11/2.4.3-GCCcore-x.y.z-Pytho... + 10) libpciaccess/0.16-GCCcore-x.y.z 23) SciPy-bundle/2020.03-foss-2020a-Py... + 11) hwloc/2.2.0-GCCcore-x.y.z 24) networkx/2.4-foss-2020a-Python-3.8... + 12) libevent/2.1.11-GCCcore-x.y.z 25) GROMACS/2020.1-foss-2020a-Python-3... + 13) UCX/1.8.0-GCCcore-x.y.z +``` +{: .output} + +So in this case, loading the `GROMACS` module (a bioinformatics software +package), also loaded `GMP/6.2.0-GCCcore-x.y.z` and +`SciPy-bundle/2020.03-foss-2020a-Python-3.x.y` as well. Let's try unloading the +`GROMACS` package. + +``` +{{ site.remote.prompt }} module unload GROMACS +{{ site.remote.prompt }} module list +``` +{: .language-bash} + +``` +Currently Loaded Modules: + 1) GCCcore/x.y.z 13) UCX/1.8.0-GCCcore-x.y.z + 2) Tcl/8.6.10-GCCcore-x.y.z 14) libfabric/1.11.0-GCCcore-x.y.z + 3) SQLite/3.31.1-GCCcore-x.y.z 15) PMIx/3.1.5-GCCcore-x.y.z + 4) GMP/6.2.0-GCCcore-x.y.z 16) OpenMPI/4.0.3-GCC-x.y.z + 5) libffi/3.3-GCCcore-x.y.z 17) OpenBLAS/0.3.9-GCC-x.y.z + 6) Python/3.x.y-GCCcore-x.y.z 18) gompi/2020a + 7) GCC/x.y.z 19) FFTW/3.3.8-gompi-2020a + 8) numactl/2.0.13-GCCcore-x.y.z 20) ScaLAPACK/2.1.0-gompi-2020a + 9) libxml2/2.9.10-GCCcore-x.y.z 21) foss/2020a + 10) libpciaccess/0.16-GCCcore-x.y.z 22) pybind11/2.4.3-GCCcore-x.y.z-Pytho... + 11) hwloc/2.2.0-GCCcore-x.y.z 23) SciPy-bundle/2020.03-foss-2020a-Py... + 12) libevent/2.1.11-GCCcore-x.y.z 24) networkx/2.4-foss-2020a-Python-3.x.y +``` +{: .output} + +So using `module unload` "un-loads" a module, and depending on how a site is + configured it may also unload all of the dependencies (in our case it does + not). If we wanted to unload everything at once, we could run `module purge` + (unloads everything). + +``` +{{ site.remote.prompt }} module purge +{{ site.remote.prompt }} module list +``` +{: .language-bash} + +``` +No modules loaded +``` +{: .output} + +Note that `module purge` is informative. It will also let us know if a default +set of "sticky" packages cannot be unloaded (and how to actually unload these +if we truly so desired). diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/wrong-gcc-version.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/wrong-gcc-version.snip new file mode 100644 index 00000000..8fbd2825 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/wrong-gcc-version.snip @@ -0,0 +1,5 @@ + diff --git a/episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks-jobscript.snip b/episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks-jobscript.snip new file mode 100644 index 00000000..2f643071 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks-jobscript.snip @@ -0,0 +1,16 @@ +``` +{{ site.remote.bash_shebang }} +{{ site.sched.comment }} {{ site.sched.flag.name }} parallel-job +{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} +{{ site.sched.comment }} -N 1 +{{ site.sched.comment }} -n 8 + +# Load the computing environment we need +# (mpi4py and numpy are in SciPy-bundle) +module load {{ site.remote.module_python3 }} +module load SciPy-bundle + +# Execute the task +mpiexec amdahl +``` +{: .language-bash} diff --git a/episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks-jobscript.snip b/episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks-jobscript.snip new file mode 100644 index 00000000..19804d74 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks-jobscript.snip @@ -0,0 +1,16 @@ +``` +{{ site.remote.bash_shebang }} +{{ site.sched.comment }} {{ site.sched.flag.name }} parallel-job +{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} +{{ site.sched.comment }} -N 1 +{{ site.sched.comment }} -n 4 + +# Load the computing environment we need +# (mpi4py and numpy are in SciPy-bundle) +module load {{ site.remote.module_python3 }} +module load SciPy-bundle + +# Execute the task +mpiexec amdahl +``` +{: .language-bash} diff --git a/episodes/files/snippets/Ghastly_Mistakes/parallel/one-task-jobscript.snip b/episodes/files/snippets/Ghastly_Mistakes/parallel/one-task-jobscript.snip new file mode 100644 index 00000000..1941ef04 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/parallel/one-task-jobscript.snip @@ -0,0 +1,14 @@ +``` +{{ site.remote.bash_shebang }} +{{ site.sched.comment }} {{ site.sched.flag.name }} solo-job +{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} +{{ site.sched.comment }} -N 1 +{{ site.sched.comment }} -n 1 + +# Load the computing environment we need +module load {{ site.remote.module_python3 }} + +# Execute the task +amdahl +``` +{: .language-bash} diff --git a/episodes/files/snippets/Ghastly_Mistakes/resources/account-history.snip b/episodes/files/snippets/Ghastly_Mistakes/resources/account-history.snip new file mode 100644 index 00000000..d5a87620 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/resources/account-history.snip @@ -0,0 +1,14 @@ +``` + JobID JobName Partition Account AllocCPUS State ExitCode +------------ ---------- ---------- ---------- ---------- ---------- -------- +7 file.sh cpubase_b+ def-spons+ 1 COMPLETED 0:0 +7.batch batch def-spons+ 1 COMPLETED 0:0 +7.extern extern def-spons+ 1 COMPLETED 0:0 +8 file.sh cpubase_b+ def-spons+ 1 COMPLETED 0:0 +8.batch batch def-spons+ 1 COMPLETED 0:0 +8.extern extern def-spons+ 1 COMPLETED 0:0 +9 example-j+ cpubase_b+ def-spons+ 1 COMPLETED 0:0 +9.batch batch def-spons+ 1 COMPLETED 0:0 +9.extern extern def-spons+ 1 COMPLETED 0:0 +``` +{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/resources/monitor-processes-top.snip b/episodes/files/snippets/Ghastly_Mistakes/resources/monitor-processes-top.snip new file mode 100644 index 00000000..12685735 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/resources/monitor-processes-top.snip @@ -0,0 +1,19 @@ +``` +top - 21:00:19 up 3:07, 1 user, load average: 1.06, 1.05, 0.96 +Tasks: 311 total, 1 running, 222 sleeping, 0 stopped, 0 zombie +%Cpu(s): 7.2 us, 3.2 sy, 0.0 ni, 89.0 id, 0.0 wa, 0.2 hi, 0.2 si, 0.0 st +KiB Mem : 16303428 total, 8454704 free, 3194668 used, 4654056 buff/cache +KiB Swap: 8220668 total, 8220668 free, 0 used. 11628168 avail Mem + + PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND + 1693 jeff 20 0 4270580 346944 171372 S 29.8 2.1 9:31.89 gnome-shell + 3140 jeff 20 0 3142044 928972 389716 S 27.5 5.7 13:30.29 Web Content + 3057 jeff 20 0 3115900 521368 231288 S 18.9 3.2 10:27.71 firefox + 6007 jeff 20 0 813992 112336 75592 S 4.3 0.7 0:28.25 tilix + 1742 jeff 20 0 975080 164508 130624 S 2.0 1.0 3:29.83 Xwayland + 1 root 20 0 230484 11924 7544 S 0.3 0.1 0:06.08 systemd + 68 root 20 0 0 0 0 I 0.3 0.0 0:01.25 kworker/4:1 + 2913 jeff 20 0 965620 47892 37432 S 0.3 0.3 0:11.76 code + 2 root 20 0 0 0 0 S 0.0 0.0 0:00.02 kthreadd +``` +{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/resources/system-memory-free.snip b/episodes/files/snippets/Ghastly_Mistakes/resources/system-memory-free.snip new file mode 100644 index 00000000..ec4c0d3f --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/resources/system-memory-free.snip @@ -0,0 +1,6 @@ +``` +total used free shared buff/cache available +Mem: 3.8G 1.5G 678M 327M 1.6G 1.6G +Swap: 3.9G 170M 3.7G +``` +{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-script.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-script.Rmd new file mode 100644 index 00000000..5f76fb2c --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-script.Rmd @@ -0,0 +1,3 @@ +```output +Submitted batch job 7 +``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-status.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-status.Rmd new file mode 100644 index 00000000..8873c7c1 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-status.Rmd @@ -0,0 +1,8 @@ +```output +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 9 doom example-job luigi R 0:05 1 doo1 +``` + +We can see all the details of our job, most importantly that it is in the `R` +or `RUNNING` state. Sometimes our jobs might need to wait in a queue +(`PENDING`) or have an error (`E`). diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/job-with-name-status.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/job-with-name-status.Rmd new file mode 100644 index 00000000..aa8eab85 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/job-with-name-status.Rmd @@ -0,0 +1,4 @@ +```output +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 10 doom hello-world luigi R 0:02 1 doo1 +``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/option-flags-list.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/option-flags-list.Rmd new file mode 100644 index 00000000..9bc993ff --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/option-flags-list.Rmd @@ -0,0 +1,17 @@ +> option-flag-list.Rmd is constant for all snippet libraries + +* `--ntasks=` or `-n `: How many CPU cores does your job need, + in total? + +* `--time ` or `-t `: + How much real-world time (walltime) will your job take to run? The `` + part can be omitted. + +* `--mem=`: How much memory on a node does your job need in + megabytes? You can also specify gigabytes using by adding a little "g" + afterwards (example: `--mem=5g`) + +* `--nodes=` or `-N `: How many separate machines does your job + need to run on? Note that if you set `ntasks` to a number greater than what + one machine can offer, {{ site.sched.name }} will set this value + automatically. diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/print-sched-variables.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/print-sched-variables.Rmd new file mode 100644 index 00000000..659cefae --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/print-sched-variables.Rmd @@ -0,0 +1,32 @@ +::: challenge + +## Job environment variables + +When ``r config$sched$name`` runs a job, it sets a number of environment +variables for the job. One of these will let us check what directory our job +script was submitted from. The `SLURM_SUBMIT_DIR` variable is set to the +directory from which our job was submitted. Using the `SLURM_SUBMIT_DIR` +variable, modify your job so that it prints out the location from which the +job was submitted. + +:::: solution + +## Solution + +```bash +`r config$remote$prompt` nano example-job.sh +`r config$remote$prompt` cat example-job.sh +``` + +```bash +`r config$remote$bash_shebang` +`r config$sched$comment` -t 00:00:30 + +echo -n "This script is running on " +hostname + +echo "This job was launched in the following directory:" +echo ${SLURM_SUBMIT_DIR} +``` +:::: +::: diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-job.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-job.Rmd new file mode 100644 index 00000000..7dde2ada --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-job.Rmd @@ -0,0 +1,3 @@ +```bash +`r config$remote$prompt` cat slurm-12.out +``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-output.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-output.Rmd new file mode 100644 index 00000000..fa1b19ea --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-output.Rmd @@ -0,0 +1,5 @@ +```output +This script is running on ... +slurmstepd: error: *** JOB 12 ON doo1 CANCELLED AT 2021-02-19T13:55:57 +DUE TO TIME LIMIT *** +``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-begin.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-begin.Rmd new file mode 100644 index 00000000..d5a03bfb --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-begin.Rmd @@ -0,0 +1,6 @@ +```output +Submitted batch job 13 + +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 13 doom long_job luigi R 0:02 1 doo1 +``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-cancel.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-cancel.Rmd new file mode 100644 index 00000000..ccb9cd24 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-cancel.Rmd @@ -0,0 +1,3 @@ +```output +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) +``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-multiple-jobs.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-multiple-jobs.Rmd new file mode 100644 index 00000000..5ee30f8d --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-multiple-jobs.Rmd @@ -0,0 +1,29 @@ +::: challenge +## Cancelling multiple jobs + +We can also cancel all of our jobs at once using the `-u` option. This will +delete all jobs for a specific user (in this case, yourself). Note that you +can only delete your own jobs. + +Try submitting multiple jobs and then cancelling them all. + +:::: solution + +## Solution + +First, submit a trio of jobs: + +```bash +`r config$remote$prompt` `r config$sched$submit$name` `r config$sched$submit$options` example-job.sh +`r config$remote$prompt` `r config$sched$submit$name` `r config$sched$submit$options` example-job.sh +`r config$remote$prompt` `r config$sched$submit$name` `r config$sched$submit$options` example-job.sh +``` + +Then, cancel them all: + +```bash +`r config$remote$prompt` `r config$sched$del` -u `r config$remote$user` +``` + +:::: +::: diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/using-nodes-interactively.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/using-nodes-interactively.Rmd new file mode 100644 index 00000000..80834a8f --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/using-nodes-interactively.Rmd @@ -0,0 +1,68 @@ +> using-nodes-interactively.Rmd is constant across snippets + +The `r config$sched$interactive` command runs a single command on the cluster +and then exits. Let's demonstrate this by running the `hostname` command with +`r config$sched$interactive`. (We can cancel an `r config$sched$interactive` +job with `Ctrl-c`.) + +```bash +`r config$remote.prompt` `r config$sched$interactive` hostname +``` + +```output +`r config$remote$node` +``` + +`r config$sched$interactive` accepts all of the same options as +`r config$sched$submit.name`. However, instead of specifying these in a script, +these options are specified on the command-line when starting a job. To submit +a job that uses 2 CPUs for instance, we could use the following command: + +```bash +`r config$remote.prompt` `r config$sched$interactive` -n 2 echo "This job will use 2 CPUs." +``` + +```output +This job will use 2 CPUs. +This job will use 2 CPUs. +``` + +Typically, the resulting shell environment will be the same as that for +`r config$sched$submit$name`. + +### Interactive jobs + +Sometimes, you will need a lot of resources for interactive use. Perhaps it's +our first time running an analysis or we are attempting to debug something that +went wrong with a previous job. Fortunately, {{ config$sched$name` makes it +easy to start an interactive job with `r config$sched$interactive`: + +```bash +`r config$remote$prompt` `r config$sched$interactive` --pty bash +``` + +You should be presented with a bash prompt. Note that the prompt will likely +change to reflect your new location, in this case the compute node we are +logged on. You can also verify this with `hostname`. + +::: callout + +## Creating remote graphics + +To see graphical output inside your jobs, you need to use X11 forwarding. To +connect with this feature enabled, use the `-Y` option when you login with +the `ssh` command, e.g., `r paste("ssh -Y ", config$remote$user, "@", config$remote$login, sep="")`. + +To demonstrate what happens when you create a graphics window on the remote +node, use the `xeyes` command. A relatively adorable pair of eyes should pop +up (press `Ctrl-C` to stop). If you are using a Mac, you must have installed +XQuartz (and restarted your computer) for this to work. + +If your cluster has the +[slurm-spank-x11](https://github.com/hautreux/slurm-spank-x11) plugin +installed, you can ensure X11 forwarding within interactive jobs by using the +`--x11` option for `r config$sched$interactive` with the command +`r paste(config$sched$interactive, " --x11", " --pty bash", sep="")`. +::: + +When you are done with the interactive job, type `exit` to quit your session. diff --git a/episodes/files/snippets/Ghastly_Mistakes/transferring-files/filezilla-ssh-tunnel-instructions.snip b/episodes/files/snippets/Ghastly_Mistakes/transferring-files/filezilla-ssh-tunnel-instructions.snip new file mode 100644 index 00000000..e69de29b From 5067601226114b8d299b523aa78ee45f11a4370d Mon Sep 17 00:00:00 2001 From: Trevor Keller Date: Wed, 5 Nov 2025 14:13:19 -0500 Subject: [PATCH 3/7] Snippetized Ep. 15 --- episodes/11-connecting.Rmd | 2 +- episodes/14-environment-variables.Rmd | 2 +- episodes/15-modules.Rmd | 209 ++---------------- .../Ghastly_Mistakes/_config_options.yml | 2 +- .../modules/available-modules.Rmd | 13 ++ .../_config_options.yml | 3 + .../modules/available-modules.Rmd} | 11 +- .../modules/available-modules.snip | 21 -- .../modules/default-modules.snip | 4 - ...missing-python.snip => missing-python.Rmd} | 15 +- .../modules/module-load-python.Rmd | 8 + .../modules/module-load-python.snip | 5 - ...-output.snip => python-executable-dir.Rmd} | 5 +- .../modules/python-executable-dir.snip | 4 - .../modules/python-ls-dir-command.snip | 4 - .../modules/software-dependencies.snip | 44 ++-- episodes/lesson_config.yaml | 53 +---- episodes/load_config.R | 3 +- 18 files changed, 87 insertions(+), 321 deletions(-) create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.Rmd rename episodes/files/snippets/{Ghastly_Mistakes/modules/available-modules.snip => HPCC_MagicCastle_slurm/modules/available-modules.Rmd} (81%) delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.snip delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/modules/default-modules.snip rename episodes/files/snippets/HPCC_MagicCastle_slurm/modules/{missing-python.snip => missing-python.Rmd} (82%) create mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/modules/module-load-python.Rmd delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/modules/module-load-python.snip rename episodes/files/snippets/HPCC_MagicCastle_slurm/modules/{python-ls-dir-output.snip => python-executable-dir.Rmd} (85%) delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-executable-dir.snip delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-ls-dir-command.snip diff --git a/episodes/11-connecting.Rmd b/episodes/11-connecting.Rmd index 3ef0dc90..103d0379 100644 --- a/episodes/11-connecting.Rmd +++ b/episodes/11-connecting.Rmd @@ -45,7 +45,7 @@ machine sends back. We will use the Secure SHell protocol (or SSH) to open an encrypted network connection between two machines, allowing you to send \& receive text and data without having to worry about prying eyes. -![connect-to-remote.svg](/fig/connect-to-remote.svg){ +![connect-to-remote.svg](fig/connect-to-remote.svg){ max-width="50%" alt="Connect to cluster. " } diff --git a/episodes/14-environment-variables.Rmd b/episodes/14-environment-variables.Rmd index 2c150b5d..3e4c6977 100644 --- a/episodes/14-environment-variables.Rmd +++ b/episodes/14-environment-variables.Rmd @@ -279,7 +279,7 @@ unless we type in the full path to the program, since the directory `/users/vlad` isn't in `PATH`. This means that I can have executables in lots of different places as long as -I remember that I need to to update my `PATH` so that my shell can find them. +I remember that I need to update my `PATH` so that my shell can find them. What if I want to run two different versions of the same program? Since they share the same name, if I add them both to my `PATH` the first one diff --git a/episodes/15-modules.Rmd b/episodes/15-modules.Rmd index 1bb42a48..db1d387e 100644 --- a/episodes/15-modules.Rmd +++ b/episodes/15-modules.Rmd @@ -38,11 +38,11 @@ presence (or absence) of a software package will break others that depend on it. Two well known examples are Python and C compiler versions. Python 3 famously provides a `python` command that conflicts with that provided by Python 2. Software compiled against a newer version of the C libraries and -then run on a machine that has older C libraries installed will result in a -nasty `'GLIBCXX_3.4.20' not found` error. +then run on a machine that has older C libraries installed will result in an +opaque `'GLIBCXX_3.4.20' not found` error. Software versioning is another common issue. A team might depend on a certain -package version for their research project - if the software version was to +package version for their research project -- if the software version was to change (for instance, if a package was updated), it might affect their results. Having access to multiple software versions allows a set of researchers to prevent software versioning issues from affecting their results. @@ -50,7 +50,7 @@ prevent software versioning issues from affecting their results. Dependencies are where a particular software package (or even a particular version) depends on having access to another software package (or even a particular version of another software package). For example, the VASP -materials science software may depend on having a particular version of the +materials science software may require a particular version of the FFTW (Fastest Fourier Transform in the West) software library available for it to work. @@ -79,36 +79,14 @@ you are using. To see available software modules, use `module avail`: -```bash -`r config$remote$prompt` module avail -``` - -```output -~~~ /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/modules/all ~~~ - Bazel/3.6.0-GCCcore-x.y.z NSS/3.51-GCCcore-x.y.z - Bison/3.5.3-GCCcore-x.y.z Ninja/1.10.0-GCCcore-x.y.z - Boost/1.72.0-gompi-2020a OSU-Micro-Benchmarks/5.6.3-gompi-2020a - CGAL/4.14.3-gompi-2020a-Python-3.x.y OpenBLAS/0.3.9-GCC-x.y.z - CMake/3.16.4-GCCcore-x.y.z OpenFOAM/v2006-foss-2020a - -[removed most of the output here for clarity] - - Where: - L: Module is loaded - Aliases: Aliases exist: foo/1.2.3 (1.2) means that "module load foo/1.2" - will load foo/1.2.3 - D: Default Module - -Use "module spider" to find all possible modules and extensions. -Use "module keyword key1 key2 ..." to search for all possible modules matching -any of the "keys". +```{r, child=paste(snippets, '/modules/available-modules.Rmd', sep=''), eval=TRUE} ``` ### Listing Currently Loaded Modules You can use the `module list` command to see which modules you currently have loaded in your environment. If you have no modules loaded, you will see a -message telling you so +message telling you so. ```bash `r config$remote$prompt` module list @@ -120,57 +98,23 @@ No Modulefiles Currently Loaded. ## Loading and Unloading Software -To load a software module, use `module load`. In this example we will use -Python 3. +To load a software module, use `module load`. -Initially, Python 3 is not loaded. We can test this by using the `which` -command. `which` looks for programs the same way that Bash does, so we can use -it to tell us where a particular piece of software is stored. +In this example we will use Python 3. Initially, it is not loaded. +We can test this by using the `which` command. `which` looks for +programs the same way that Bash does, so we can use it to tell us +where a particular piece of software is stored. ```bash `r config$remote$prompt` which python3 ``` -If the `python3` command was unavailable, we would see output like - -```output -/usr/bin/which: no python3 in (/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin:/opt/software/slurm/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/home/`r config$remote$user`/.local/bin:/home/`r config$remote$user`/bin) -``` - -Note that this wall of text is really a list, with values separated -by the `:` character. The output is telling us that the `which` command -searched the following directories for `python3`, without success: - -```output -/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin -/opt/software/slurm/bin -/usr/local/bin -/usr/bin -/usr/local/sbin -/usr/sbin -/opt/puppetlabs/bin -/home/`r config$remote$user`/.local/bin -/home/`r config$remote$user`/bin -``` - -However, in our case we do have an existing `python3` available so we see - -```output -/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin/python3 +```{r, child=paste(snippets, '/modules/missing-python.Rmd', sep=''), eval=TRUE} ``` -We need a different Python than the system provided one though, so let us load -a module to access it. - We can load the `python3` command with `module load`: -```bash -`r config$remote$prompt` module load `r config$remote$module_python3` -`r config$remote$prompt` which python3 -``` - -```output -/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin/python3 +```{r, child=paste(snippets, '/modules/module-load-python.Rmd', sep=''), eval=TRUE} ``` So, what just happened? @@ -193,26 +137,9 @@ variables we can print it out using `echo`. You'll notice a similarity to the output of the `which` command. In this case, there's only one difference: the different directory at the beginning. When we ran the `module load` command, it added a directory to the beginning of our -`$PATH`. Let's examine what's there: - -```bash -`r config$remote$prompt` ls /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin -``` +`$PATH` -- or "prepended to PATH". Let's examine what's there: -```output -2to3 nosetests-3.8 python rst2s5.py -2to3-3.8 pasteurize python3 rst2xetex.py -chardetect pbr python3.8 rst2xml.py -cygdb pip python3.8-config rstpep2html.py -cython pip3 python3-config runxlrd.py -cythonize pip3.8 rst2html4.py sphinx-apidoc -easy_install pybabel rst2html5.py sphinx-autogen -easy_install-3.8 __pycache__ rst2html.py sphinx-build -futurize pydoc3 rst2latex.py sphinx-quickstart -idle3 pydoc3.8 rst2man.py tabulate -idle3.8 pygmentize rst2odt_prepstyles.py virtualenv -netaddr pytest rst2odt.py wheel -nosetests py.test rst2pseudoxml.py +```{r, child=paste(snippets, '/modules/python-executable-dir.Rmd', sep=''), eval=TRUE} ``` Taking this to its conclusion, `module load` will add software to your `$PATH`. @@ -220,86 +147,9 @@ It "loads" software. A special note on this - depending on which version of the `module` program that is installed at your site, `module load` will also load required software dependencies. -To demonstrate, let's use `module list`. `module list` shows all loaded -software modules. - -```bash -`r config$remote$prompt` module list +```{r, child=paste(snippets, '/modules/software-dependencies.Rmd', sep=''), eval=TRUE} ``` -```output -Currently Loaded Modules: - 1) GCCcore/x.y.z 4) GMP/6.2.0-GCCcore-x.y.z - 2) Tcl/8.6.10-GCCcore-x.y.z 5) libffi/3.3-GCCcore-x.y.z - 3) SQLite/3.31.1-GCCcore-x.y.z 6) Python/3.x.y-GCCcore-x.y.z -``` - -```bash -`r config$remote$prompt` module load GROMACS -`r config$remote$prompt` module list -``` - -```output -Currently Loaded Modules: - 1) GCCcore/x.y.z 14) libfabric/1.11.0-GCCcore-x.y.z - 2) Tcl/8.6.10-GCCcore-x.y.z 15) PMIx/3.1.5-GCCcore-x.y.z - 3) SQLite/3.31.1-GCCcore-x.y.z 16) OpenMPI/4.0.3-GCC-x.y.z - 4) GMP/6.2.0-GCCcore-x.y.z 17) OpenBLAS/0.3.9-GCC-x.y.z - 5) libffi/3.3-GCCcore-x.y.z 18) gompi/2020a - 6) Python/3.x.y-GCCcore-x.y.z 19) FFTW/3.3.8-gompi-2020a - 7) GCC/x.y.z 20) ScaLAPACK/2.1.0-gompi-2020a - 8) numactl/2.0.13-GCCcore-x.y.z 21) foss/2020a - 9) libxml2/2.9.10-GCCcore-x.y.z 22) pybind11/2.4.3-GCCcore-x.y.z-Pytho... - 10) libpciaccess/0.16-GCCcore-x.y.z 23) SciPy-bundle/2020.03-foss-2020a-Py... - 11) hwloc/2.2.0-GCCcore-x.y.z 24) networkx/2.4-foss-2020a-Python-3.8... - 12) libevent/2.1.11-GCCcore-x.y.z 25) GROMACS/2020.1-foss-2020a-Python-3... - 13) UCX/1.8.0-GCCcore-x.y.z -``` - -So in this case, loading the `GROMACS` module (a bioinformatics software -package), also loaded `GMP/6.2.0-GCCcore-x.y.z` and -`SciPy-bundle/2020.03-foss-2020a-Python-3.x.y` as well. Let's try unloading the -`GROMACS` package. - -```bash -`r config$remote$prompt` module unload GROMACS -`r config$remote$prompt` module list -``` - -```output -Currently Loaded Modules: - 1) GCCcore/x.y.z 13) UCX/1.8.0-GCCcore-x.y.z - 2) Tcl/8.6.10-GCCcore-x.y.z 14) libfabric/1.11.0-GCCcore-x.y.z - 3) SQLite/3.31.1-GCCcore-x.y.z 15) PMIx/3.1.5-GCCcore-x.y.z - 4) GMP/6.2.0-GCCcore-x.y.z 16) OpenMPI/4.0.3-GCC-x.y.z - 5) libffi/3.3-GCCcore-x.y.z 17) OpenBLAS/0.3.9-GCC-x.y.z - 6) Python/3.x.y-GCCcore-x.y.z 18) gompi/2020a - 7) GCC/x.y.z 19) FFTW/3.3.8-gompi-2020a - 8) numactl/2.0.13-GCCcore-x.y.z 20) ScaLAPACK/2.1.0-gompi-2020a - 9) libxml2/2.9.10-GCCcore-x.y.z 21) foss/2020a - 10) libpciaccess/0.16-GCCcore-x.y.z 22) pybind11/2.4.3-GCCcore-x.y.z-Pytho... - 11) hwloc/2.2.0-GCCcore-x.y.z 23) SciPy-bundle/2020.03-foss-2020a-Py... - 12) libevent/2.1.11-GCCcore-x.y.z 24) networkx/2.4-foss-2020a-Python-3.x.y -``` - -So using `module unload` "un-loads" a module, and depending on how a site is -configured it may also unload all of the dependencies (in our case it does -not). If we wanted to unload everything at once, we could run `module purge` -(unloads everything). - -```bash -`r config$remote$prompt` module purge -`r config$remote$prompt` module list -``` - -```output -No modules loaded -``` - -Note that `module purge` is informative. It will also let us know if a default -set of "sticky" packages cannot be unloaded (and how to actually unload these -if we truly so desired). - Note that this module loading process happens principally through the manipulation of environment variables like `$PATH`. There is usually little or no data transfer involved. @@ -322,31 +172,14 @@ certain version, or version X broke compatibility with a file format you use. In either of these example cases, it helps to be very specific about what software is loaded. -Let's examine the output of `module avail` more closely. +Let's examine the output of `module avail` more closely, using the pager since +there may be reams of output: ```bash -`r config$remote$prompt` module avail +`r config$remote$prompt` module avail | less ``` -```output -~~~ /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/modules/all ~~~ - Bazel/3.6.0-GCCcore-x.y.z NSS/3.51-GCCcore-x.y.z - Bison/3.5.3-GCCcore-x.y.z Ninja/1.10.0-GCCcore-x.y.z - Boost/1.72.0-gompi-2020a OSU-Micro-Benchmarks/5.6.3-gompi-2020a - CGAL/4.14.3-gompi-2020a-Python-3.x.y OpenBLAS/0.3.9-GCC-x.y.z - CMake/3.16.4-GCCcore-x.y.z OpenFOAM/v2006-foss-2020a - -[removed most of the output here for clarity] - - Where: - L: Module is loaded - Aliases: Aliases exist: foo/1.2.3 (1.2) means that "module load foo/1.2" - will load foo/1.2.3 - D: Default Module - -Use "module spider" to find all possible modules and extensions. -Use "module keyword key1 key2 ..." to search for all possible modules matching -any of the "keys". +```{r, child=paste(snippets, '/modules/available-modules.Rmd', sep=''), eval=TRUE} ``` ::::::::::::::::::::::::::::::::::::::: challenge @@ -373,7 +206,7 @@ compute node). `r if (!is.null(config$sched$flag$qos)) {print(paste(config$sched$comment,config$sched$flag$qos))} `r config$sched$comment` `r config$sched$flag$time` 00:00:30 -module load `r config$remote$module_python3` +module load `r config$remote$modules$python` python3 --version ``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml b/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml index 76eb0075..54b76da6 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml +++ b/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml @@ -9,7 +9,7 @@ # Compute irresponsibly. --- -snippets: "/snippets_library/Ghastly_Mistakes" +snippets: "Ghastly_Mistakes" local: prompt: "luigi@mushroomkingdom:~$" diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.Rmd b/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.Rmd new file mode 100644 index 00000000..e7fc4eb2 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.Rmd @@ -0,0 +1,13 @@ +```output +~~~ /usr/local/modules ~~~ + amdahl/0.0.1 (D) python/2.7.1 (O) + + Where: + L: Module is loaded + D: Default Module + O: Module is obsolete + +Use "module spider" to find all possible modules and extensions. +Use "module keyword key1 key2 ..." to search for all possible modules matching +any of the "keys". +``` \ No newline at end of file diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml b/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml index 45f7a367..1995b6cb 100644 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml @@ -29,6 +29,9 @@ remote: module_python3: "Python" prompt: "[yourUsername@login1 ~]$" bash_shebang: "#!/bin/bash" + modules: + python: "Python" + sched: name: "Slurm" diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.Rmd similarity index 81% rename from episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.snip rename to episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.Rmd index f6f3f50b..2c48235a 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.snip +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.Rmd @@ -1,4 +1,8 @@ +```bash +`r config$remote$prompt` module avail ``` + +```output ~~~ /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/modules/all ~~~ Bazel/3.6.0-GCCcore-x.y.z NSS/3.51-GCCcore-x.y.z Bison/3.5.3-GCCcore-x.y.z Ninja/1.10.0-GCCcore-x.y.z @@ -10,12 +14,11 @@ Where: L: Module is loaded - Aliases: Aliases exist: foo/1.2.3 (1.2) means that "module load foo/1.2" - will load foo/1.2.3 D: Default Module + Aliases exist: foo/1.2.3 (1.2) means that + "module load foo/1.2" will load foo/1.2.3 Use "module spider" to find all possible modules and extensions. Use "module keyword key1 key2 ..." to search for all possible modules matching any of the "keys". -``` -{: .output} +``` \ No newline at end of file diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.snip deleted file mode 100644 index f6f3f50b..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.snip +++ /dev/null @@ -1,21 +0,0 @@ -``` -~~~ /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/modules/all ~~~ - Bazel/3.6.0-GCCcore-x.y.z NSS/3.51-GCCcore-x.y.z - Bison/3.5.3-GCCcore-x.y.z Ninja/1.10.0-GCCcore-x.y.z - Boost/1.72.0-gompi-2020a OSU-Micro-Benchmarks/5.6.3-gompi-2020a - CGAL/4.14.3-gompi-2020a-Python-3.x.y OpenBLAS/0.3.9-GCC-x.y.z - CMake/3.16.4-GCCcore-x.y.z OpenFOAM/v2006-foss-2020a - -[removed most of the output here for clarity] - - Where: - L: Module is loaded - Aliases: Aliases exist: foo/1.2.3 (1.2) means that "module load foo/1.2" - will load foo/1.2.3 - D: Default Module - -Use "module spider" to find all possible modules and extensions. -Use "module keyword key1 key2 ..." to search for all possible modules matching -any of the "keys". -``` -{: .output} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/default-modules.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/default-modules.snip deleted file mode 100644 index a448dd96..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/default-modules.snip +++ /dev/null @@ -1,4 +0,0 @@ -``` -No Modulefiles Currently Loaded. -``` -{: .output} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/missing-python.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/missing-python.Rmd similarity index 82% rename from episodes/files/snippets/HPCC_MagicCastle_slurm/modules/missing-python.snip rename to episodes/files/snippets/HPCC_MagicCastle_slurm/modules/missing-python.Rmd index 89039d32..f06ed919 100644 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/missing-python.snip +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/missing-python.Rmd @@ -1,15 +1,14 @@ If the `python3` command was unavailable, we would see output like +```output +/usr/bin/which: no python3 in (/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin:/opt/software/slurm/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/home/`r config$remote$user`/.local/bin:/home/`r config$remote$user`/bin) ``` -/usr/bin/which: no python3 in (/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin:/opt/software/slurm/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/home/{{site.remote.user}}/.local/bin:/home/{{site.remote.user}}/bin) -``` -{: .output} Note that this wall of text is really a list, with values separated by the `:` character. The output is telling us that the `which` command searched the following directories for `python3`, without success: -``` +```output /cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin /opt/software/slurm/bin /usr/local/bin @@ -17,17 +16,15 @@ searched the following directories for `python3`, without success: /usr/local/sbin /usr/sbin /opt/puppetlabs/bin -/home/{{site.remote.user}}/.local/bin -/home/{{site.remote.user}}/bin +/home/`r config$remote$user`/.local/bin +/home/`r config$remote$user`/bin ``` -{: .output} However, in our case we do have an existing `python3` available so we see -``` +```output /cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin/python3 ``` -{: .output} We need a different Python than the system provided one though, so let us load a module to access it. diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/module-load-python.Rmd b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/module-load-python.Rmd new file mode 100644 index 00000000..81d3be34 --- /dev/null +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/module-load-python.Rmd @@ -0,0 +1,8 @@ +```bash +`r config$remote$prompt` module load `r config$remote$modules$python` +`r config$remote$prompt` which python3 +``` + +```output +/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin/python3 +``` \ No newline at end of file diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/module-load-python.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/module-load-python.snip deleted file mode 100644 index d9bab7b4..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/module-load-python.snip +++ /dev/null @@ -1,5 +0,0 @@ -``` -{{ site.remote.prompt }} module load {{ site.remote.module_python3 }} -{{ site.remote.prompt }} which python3 -``` -{: .language-bash} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-ls-dir-output.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-executable-dir.Rmd similarity index 85% rename from episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-ls-dir-output.snip rename to episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-executable-dir.Rmd index 01d010ba..13031bf9 100644 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-ls-dir-output.snip +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-executable-dir.Rmd @@ -1,4 +1,8 @@ +```bash +`r config$remote$prompt` ls /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin ``` + +```output 2to3 nosetests-3.8 python rst2s5.py 2to3-3.8 pasteurize python3 rst2xetex.py chardetect pbr python3.8 rst2xml.py @@ -13,4 +17,3 @@ idle3.8 pygmentize rst2odt_prepstyles.py virtualenv netaddr pytest rst2odt.py wheel nosetests py.test rst2pseudoxml.py ``` -{: .output} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-executable-dir.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-executable-dir.snip deleted file mode 100644 index 46dec092..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-executable-dir.snip +++ /dev/null @@ -1,4 +0,0 @@ -``` -/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin/python3 -``` -{: .output} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-ls-dir-command.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-ls-dir-command.snip deleted file mode 100644 index 80319d0a..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-ls-dir-command.snip +++ /dev/null @@ -1,4 +0,0 @@ -``` -{{ site.remote.prompt }} ls /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin -``` -{: .language-bash} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/software-dependencies.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/software-dependencies.snip index fe107f2e..01234436 100644 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/software-dependencies.snip +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/software-dependencies.snip @@ -1,26 +1,23 @@ To demonstrate, let's use `module list`. `module list` shows all loaded software modules. +```bash +`r config$remote$prompt` module list ``` -{{ site.remote.prompt }} module list -``` -{: .language-bash} -``` +```output Currently Loaded Modules: 1) GCCcore/x.y.z 4) GMP/6.2.0-GCCcore-x.y.z 2) Tcl/8.6.10-GCCcore-x.y.z 5) libffi/3.3-GCCcore-x.y.z 3) SQLite/3.31.1-GCCcore-x.y.z 6) Python/3.x.y-GCCcore-x.y.z ``` -{: .output} +```bash +`r config$remote$prompt` module load GROMACS +`r config$remote$prompt` module list ``` -{{ site.remote.prompt }} module load GROMACS -{{ site.remote.prompt }} module list -``` -{: .language-bash} -``` +```output Currently Loaded Modules: 1) GCCcore/x.y.z 14) libfabric/1.11.0-GCCcore-x.y.z 2) Tcl/8.6.10-GCCcore-x.y.z 15) PMIx/3.1.5-GCCcore-x.y.z @@ -36,20 +33,18 @@ Currently Loaded Modules: 12) libevent/2.1.11-GCCcore-x.y.z 25) GROMACS/2020.1-foss-2020a-Python-3... 13) UCX/1.8.0-GCCcore-x.y.z ``` -{: .output} So in this case, loading the `GROMACS` module (a bioinformatics software package), also loaded `GMP/6.2.0-GCCcore-x.y.z` and `SciPy-bundle/2020.03-foss-2020a-Python-3.x.y` as well. Let's try unloading the `GROMACS` package. +```bash +`r config$remote$prompt` module unload GROMACS +`r config$remote$prompt` module list ``` -{{ site.remote.prompt }} module unload GROMACS -{{ site.remote.prompt }} module list -``` -{: .language-bash} -``` +```output Currently Loaded Modules: 1) GCCcore/x.y.z 13) UCX/1.8.0-GCCcore-x.y.z 2) Tcl/8.6.10-GCCcore-x.y.z 14) libfabric/1.11.0-GCCcore-x.y.z @@ -64,23 +59,20 @@ Currently Loaded Modules: 11) hwloc/2.2.0-GCCcore-x.y.z 23) SciPy-bundle/2020.03-foss-2020a-Py... 12) libevent/2.1.11-GCCcore-x.y.z 24) networkx/2.4-foss-2020a-Python-3.x.y ``` -{: .output} So using `module unload` "un-loads" a module, and depending on how a site is - configured it may also unload all of the dependencies (in our case it does - not). If we wanted to unload everything at once, we could run `module purge` - (unloads everything). +configured it may also unload all of the dependencies (in our case it does +not). If we wanted to unload everything at once, we could run `module purge` +(unloads everything). +```bash +`r config$remote$prompt` module purge +`r config$remote$prompt` module list ``` -{{ site.remote.prompt }} module purge -{{ site.remote.prompt }} module list -``` -{: .language-bash} -``` +```output No modules loaded ``` -{: .output} Note that `module purge` is informative. It will also let us know if a default set of "sticky" packages cannot be unloaded (and how to actually unload these diff --git a/episodes/lesson_config.yaml b/episodes/lesson_config.yaml index 52f0a9d7..39ae49a6 100644 --- a/episodes/lesson_config.yaml +++ b/episodes/lesson_config.yaml @@ -1,52 +1,3 @@ --- -local: - prompt: "[you@laptop:~]$" - bash_shebang: "#!/usr/bin/env bash" - -snippets: HPCC_MagicCastle_slurm -baseurl: "https://ocaisa.github.io/probable-pancake/" - -remote: - name: "HPC Carpentry's Cloud Cluster" - host_id: #"EPCC_ARCHER2" - login: "cluster.hpc-carpentry.org" - host: "login1" - node: "smnode1" - location: "cluster.hpc-carpentry.org" - homedir: "/home" - user: "yourUsername" - prompt: '[yourUsername@login1 ~]' - prompt_work: #"userid@uan01:/work/ta114/ta114/userid>" - module_python3: "Python" - bash_shebang: "#!/bin/bash" - -sched: - name: "Slurm" - submit: - name: "sbatch" - options: "" - queue: - debug: "smnode" - testing: "cpubase_bycore_b1" - status: "squeue" - flag: - user: "-u yourUsername" - interactive: "" - histdetail: "-l -j" - name: "-J" - time: "-t" - queue: "-p" - nodes: "" - tasks: "" - del: "scancel" - interactive: "srun" - info: "sinfo" - comment: "#SBATCH" - hist: "sacct -u yourUsername" - hist_filter: "" - partition: "standard" - reservation: "shortqos" - qos: "short" - budget: "short" - project: "" -main_config: "lesson_config_hpcc.yaml" +main_config: files/snippets/Ghastly_Mistakes/_config_options.yml +snippets: Ghastly_Mistakes \ No newline at end of file diff --git a/episodes/load_config.R b/episodes/load_config.R index b00ec7a1..dce81aa6 100644 --- a/episodes/load_config.R +++ b/episodes/load_config.R @@ -6,7 +6,8 @@ library(yaml) ## Load primary configuration config <- yaml.load_file("lesson_config.yaml") -## If "config" key exists, load the second configuration and merge +## If "main_config" key exists, load the second configuration and merge +print(paste("Loading ", config$main_config)) if (!is.null(config$main_config) && file.exists(config$main_config)) { override_config <- yaml.load_file(config$main_config) config <- modifyList(config, override_config) From 8b9e0e004c53ae0e81cf82183e134af5cedc2f86 Mon Sep 17 00:00:00 2001 From: Trevor Keller Date: Wed, 5 Nov 2025 14:35:26 -0500 Subject: [PATCH 4/7] Snippetized Ep. 17 --- episodes/17-parallel.Rmd | 140 +----------------- .../_config_options.yml | 4 +- .../parallel/eight-tasks-jobscript.snip | 16 -- .../parallel/eight-tasks.Rmd | 58 ++++++++ .../parallel/four-tasks-jobscript.snip | 16 -- .../parallel/four-tasks.Rmd | 56 +++++++ .../parallel/one-task-jobscript.snip | 14 -- .../parallel/one-task.Rmd | 22 +++ .../filezilla-ssh-tunnel-instructions.snip | 0 episodes/slurm_defaults.yaml | 3 +- 10 files changed, 144 insertions(+), 185 deletions(-) delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/eight-tasks-jobscript.snip create mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/eight-tasks.Rmd delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/four-tasks-jobscript.snip create mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/four-tasks.Rmd delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/one-task-jobscript.snip create mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/one-task.Rmd delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/transferring-files/filezilla-ssh-tunnel-instructions.snip diff --git a/episodes/17-parallel.Rmd b/episodes/17-parallel.Rmd index 9f05f34c..f12ebe49 100644 --- a/episodes/17-parallel.Rmd +++ b/episodes/17-parallel.Rmd @@ -167,27 +167,7 @@ tell us the important flags we might want to use when launching it. Create a submission file, requesting one task on a single node, then launch it. -```bash -`r config$remote$prompt` nano serial-job.sh -`r config$remote$prompt` cat serial-job.sh -``` - -```bash -`r config$remote$bash_shebang` -`r config$sched$comment` `r config$sched$flag$name` solo-job -`r config$sched$comment` `r config$sched$flag$queue` `r config$sched$queue$testing` -`r config$sched$comment` -N 1 -`r config$sched$comment` -n 1 - -# Load the computing environment we need -module load `r config$remote$module_python3` - -# Execute the task -amdahl -``` - -```bash -`r config$remote$prompt` `r config$sched$submit$name` serial-job.sh +```{r, child=paste(snippets, '/parallel/one-task.Rmd', sep=''), eval=TRUE} ``` As before, use the `r config$sched$name` status commands to check whether your job @@ -244,7 +224,7 @@ for 25.5 seconds, and no time was saved. The cluster can do better, if we ask. ## Running the Parallel Job The `amdahl` program uses the Message Passing Interface (MPI) for parallelism -\-- this is a common tool on HPC systems. +-- this is a common tool on HPC systems. ::::::::::::::::::::::::::::::::::::::::: callout @@ -287,61 +267,7 @@ by examining the environment variables set when the job is launched. Let's modify the job script to request more cores and use the MPI run-time. -```bash -`r config$remote$prompt` cp serial-job.sh parallel-job.sh -`r config$remote$prompt` nano parallel-job.sh -`r config$remote$prompt` cat parallel-job.sh -``` - -```bash -`r config$remote$bash_shebang` -`r config$sched$comment` `r config$sched$flag$name` parallel-job -`r config$sched$comment` `r config$sched$flag$queue` `r config$sched$queue$testing` -`r config$sched$comment` -N 1 -`r config$sched$comment` -n 4 - -# Load the computing environment we need -# (mpi4py and numpy are in SciPy-bundle) -module load `r config$remote$module_python3` -module load SciPy-bundle - -# Execute the task -mpiexec amdahl -``` - -Then submit your job. Note that the submission command has not really changed -from how we submitted the serial job: all the parallel settings are in the -batch file rather than the command line. - -```bash -`r config$remote$prompt` `r config$sched$submit$name` parallel-job.sh -``` - -As before, use the status commands to check when your job runs. - -```bash -`r config$remote$prompt` ls -t -``` - -```output -slurm-347178.out parallel-job.sh slurm-347087.out serial-job.sh amdahl README.md LICENSE.txt -``` - -```bash -`r config$remote$prompt` cat slurm-347178.out -``` - -```output -Doing 30.000 seconds of 'work' on 4 processors, -which should take 10.875 seconds with 0.850 parallel proportion of the workload. - - Hello, World! I am process 0 of 4 on `r config$remote$node`. I will do all the serial 'work' for 4.500 seconds. - Hello, World! I am process 2 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. - Hello, World! I am process 1 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. - Hello, World! I am process 3 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. - Hello, World! I am process 0 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. - -Total execution time (according to rank 0): 10.888 seconds +```{r, child=paste(snippets, '/parallel/four-tasks.Rmd', sep=''), eval=TRUE} ``` ::::::::::::::::::::::::::::::::::::::: challenge @@ -373,8 +299,6 @@ This is the basic principle behind [Amdahl's Law][amdahl], which is one way of predicting improvements in execution time for a **fixed** workload that can be subdivided and run in parallel to some extent. - - ::::::::::::::::::::::::: :::::::::::::::::::::::::::::::::::::::::::::::::: @@ -406,63 +330,7 @@ versus the number of CPUs *n* would give a straight line, *S* = *n*. Let's run one more job, so we can see how close to a straight line our `amdahl` code gets. -```bash -`r config$remote$prompt` nano parallel-job.sh -`r config$remote$prompt` cat parallel-job.sh -``` - -```bash -`r config$remote$bash_shebang` -`r config$sched$comment` `r config$sched$flag$name` parallel-job -`r config$sched$comment` `r config$sched$flag$queue` `r config$sched$queue$testing` -`r config$sched$comment` -N 1 -`r config$sched$comment` -n 8 - -# Load the computing environment we need -# (mpi4py and numpy are in SciPy-bundle) -module load `r config$remote$module_python3` -module load SciPy-bundle - -# Execute the task -mpiexec amdahl -``` - -Then submit your job. Note that the submission command has not really changed -from how we submitted the serial job: all the parallel settings are in the -batch file rather than the command line. - -```bash -`r config$remote$prompt` `r config$sched$submit$name` parallel-job.sh -``` - -As before, use the status commands to check when your job runs. - -```bash -`r config$remote$prompt` ls -t -``` - -```output -slurm-347271.out parallel-job.sh slurm-347178.out slurm-347087.out serial-job.sh amdahl README.md LICENSE.txt -``` - -```bash -`r config$remote$prompt` cat slurm-347178.out -``` - -```output -which should take 7.688 seconds with 0.850 parallel proportion of the workload. - - Hello, World! I am process 4 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. - Hello, World! I am process 0 of 8 on `r config$remote$node`. I will do all the serial 'work' for 4.500 seconds. - Hello, World! I am process 2 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. - Hello, World! I am process 1 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. - Hello, World! I am process 3 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. - Hello, World! I am process 5 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. - Hello, World! I am process 6 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. - Hello, World! I am process 7 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. - Hello, World! I am process 0 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. - -Total execution time (according to rank 0): 7.697 seconds +```{r, child=paste(snippets, '/parallel/eight-tasks.Rmd', sep=''), eval=TRUE} ``` :::::::::::::::::::::::::::::::::::::: discussion diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml b/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml index 1995b6cb..71c8b924 100644 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml @@ -15,7 +15,7 @@ snippets: "/snippets_library/HPCC_MagicCastle_slurm" local: prompt: "[you@laptop:~]$" - bash_shebang: "#!/usr/bin/env bash" + shebang: "#!/bin/bash" remote: name: "HPC Carpentry's Cloud Cluster" @@ -28,7 +28,7 @@ remote: user: "yourUsername" module_python3: "Python" prompt: "[yourUsername@login1 ~]$" - bash_shebang: "#!/bin/bash" + shebang: "#!/bin/bash" modules: python: "Python" diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/eight-tasks-jobscript.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/eight-tasks-jobscript.snip deleted file mode 100644 index 2f643071..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/eight-tasks-jobscript.snip +++ /dev/null @@ -1,16 +0,0 @@ -``` -{{ site.remote.bash_shebang }} -{{ site.sched.comment }} {{ site.sched.flag.name }} parallel-job -{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} -{{ site.sched.comment }} -N 1 -{{ site.sched.comment }} -n 8 - -# Load the computing environment we need -# (mpi4py and numpy are in SciPy-bundle) -module load {{ site.remote.module_python3 }} -module load SciPy-bundle - -# Execute the task -mpiexec amdahl -``` -{: .language-bash} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/eight-tasks.Rmd b/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/eight-tasks.Rmd new file mode 100644 index 00000000..dcbe379a --- /dev/null +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/eight-tasks.Rmd @@ -0,0 +1,58 @@ +```bash +`r config$remote$prompt` nano parallel-job.sh +`r config$remote$prompt` cat parallel-job.sh +``` + +```bash +`r config$remote$shebang` +`r config$sched$comment` `r config$sched$flag$name` parallel-job +`r config$sched$comment` `r config$sched$flag$queue` `r config$sched$queue$testing` +`r config$sched$comment` -N 1 +`r config$sched$comment` -n 8 + +# Load the computing environment we need +# (mpi4py and numpy are in SciPy-bundle) +module load `r config$remote$modules$python` +module load SciPy-bundle + +# Execute the task +mpiexec amdahl +``` + +Then submit your job. Note that the submission command has not really changed +from how we submitted the serial job: all the parallel settings are in the +batch file rather than the command line. + +```bash +`r config$remote$prompt` `r config$sched$submit$name` parallel-job.sh +``` + +As before, use the status commands to check when your job runs. + +```bash +`r config$remote$prompt` ls -t +``` + +```output +slurm-347271.out parallel-job.sh slurm-347178.out slurm-347087.out serial-job.sh amdahl README.md LICENSE.txt +``` + +```bash +`r config$remote$prompt` cat slurm-347178.out +``` + +```output +which should take 7.688 seconds with 0.850 parallel proportion of the workload. + + Hello, World! I am process 4 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 0 of 8 on `r config$remote$node`. I will do all the serial 'work' for 4.500 seconds. + Hello, World! I am process 2 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 1 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 3 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 5 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 6 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 7 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 0 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + +Total execution time (according to rank 0): 7.697 seconds +``` diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/four-tasks-jobscript.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/four-tasks-jobscript.snip deleted file mode 100644 index 19804d74..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/four-tasks-jobscript.snip +++ /dev/null @@ -1,16 +0,0 @@ -``` -{{ site.remote.bash_shebang }} -{{ site.sched.comment }} {{ site.sched.flag.name }} parallel-job -{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} -{{ site.sched.comment }} -N 1 -{{ site.sched.comment }} -n 4 - -# Load the computing environment we need -# (mpi4py and numpy are in SciPy-bundle) -module load {{ site.remote.module_python3 }} -module load SciPy-bundle - -# Execute the task -mpiexec amdahl -``` -{: .language-bash} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/four-tasks.Rmd b/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/four-tasks.Rmd new file mode 100644 index 00000000..2e885505 --- /dev/null +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/four-tasks.Rmd @@ -0,0 +1,56 @@ +```bash +`r config$remote$prompt` cp serial-job.sh parallel-job.sh +`r config$remote$prompt` nano parallel-job.sh +`r config$remote$prompt` cat parallel-job.sh +``` + +```bash +`r config$remote$shebang` +`r config$sched$comment` `r config$sched$flag$name` parallel-job +`r config$sched$comment` `r config$sched$flag$queue` `r config$sched$queue$testing` +`r config$sched$comment` -N 1 +`r config$sched$comment` -n 4 + +# Load the computing environment we need +# (mpi4py and numpy are in SciPy-bundle) +module load `r config$remote$modules$python` +module load SciPy-bundle + +# Execute the task +mpiexec amdahl +``` + +Then submit your job. Note that the submission command has not really changed +from how we submitted the serial job: all the parallel settings are in the +batch file rather than the command line. + +```bash +`r config$remote$prompt` `r config$sched$submit$name` parallel-job.sh +``` + +As before, use the status commands to check when your job runs. + +```bash +`r config$remote$prompt` ls -t +``` + +```output +slurm-347178.out parallel-job.sh slurm-347087.out serial-job.sh amdahl README.md LICENSE.txt +``` + +```bash +`r config$remote$prompt` cat slurm-347178.out +``` + +```output +Doing 30.000 seconds of 'work' on 4 processors, +which should take 10.875 seconds with 0.850 parallel proportion of the workload. + + Hello, World! I am process 0 of 4 on `r config$remote$node`. I will do all the serial 'work' for 4.500 seconds. + Hello, World! I am process 2 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. + Hello, World! I am process 1 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. + Hello, World! I am process 3 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. + Hello, World! I am process 0 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. + +Total execution time (according to rank 0): 10.888 seconds +``` diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/one-task-jobscript.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/one-task-jobscript.snip deleted file mode 100644 index 1941ef04..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/one-task-jobscript.snip +++ /dev/null @@ -1,14 +0,0 @@ -``` -{{ site.remote.bash_shebang }} -{{ site.sched.comment }} {{ site.sched.flag.name }} solo-job -{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} -{{ site.sched.comment }} -N 1 -{{ site.sched.comment }} -n 1 - -# Load the computing environment we need -module load {{ site.remote.module_python3 }} - -# Execute the task -amdahl -``` -{: .language-bash} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/one-task.Rmd b/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/one-task.Rmd new file mode 100644 index 00000000..ceefdfdb --- /dev/null +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/parallel/one-task.Rmd @@ -0,0 +1,22 @@ +```bash +`r config$remote$prompt` nano serial-job.sh +`r config$remote$prompt` cat serial-job.sh +``` + +```bash +`r config$remote$shebang` +`r config$sched$comment` `r config$sched$flag$name` solo-job +`r config$sched$comment` `r config$sched$flag$queue` `r config$sched$queue$testing` +`r config$sched$comment` -N 1 +`r config$sched$comment` -n 1 + +# Load the computing environment we need +module load `r config$remote$modules$python` + +# Execute the task +amdahl +``` + +```bash +`r config$remote$prompt` `r config$sched$submit$name` serial-job.sh +``` diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/transferring-files/filezilla-ssh-tunnel-instructions.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/transferring-files/filezilla-ssh-tunnel-instructions.snip deleted file mode 100644 index e69de29b..00000000 diff --git a/episodes/slurm_defaults.yaml b/episodes/slurm_defaults.yaml index fc550217..6372be9c 100644 --- a/episodes/slurm_defaults.yaml +++ b/episodes/slurm_defaults.yaml @@ -20,7 +20,8 @@ remote: user: "userid" # stand-in for the username prompt: "[userid@head:~]" # command-line prompt prompt_work: "[userid@head:/work/userid]" # prompt under /work - module_python3: "Python" # name of the module providing py3 + modules: + python: "Python" # name of the module providing Python v3 shebang: "#!/bin/bash" # first line of every shell script # Commands & flags for the scheduler environment From 2c924f8f419a14c2e41b4f9c34530e4089683149 Mon Sep 17 00:00:00 2001 From: Trevor Keller Date: Wed, 5 Nov 2025 14:41:42 -0500 Subject: [PATCH 5/7] Snippetized Ep. 18 --- episodes/18-resources.Rmd | 13 +------------ ...count-history.snip => account-history.Rmd} | 5 ++--- .../resources/monitor-processes-top.snip | 19 ------------------- .../resources/system-memory-free.snip | 6 ------ 4 files changed, 3 insertions(+), 40 deletions(-) rename episodes/files/snippets/HPCC_MagicCastle_slurm/resources/{account-history.snip => account-history.Rmd} (97%) delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/resources/monitor-processes-top.snip delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/resources/system-memory-free.snip diff --git a/episodes/18-resources.Rmd b/episodes/18-resources.Rmd index 8fe70605..d95d8b53 100644 --- a/episodes/18-resources.Rmd +++ b/episodes/18-resources.Rmd @@ -69,18 +69,7 @@ use ``r config$sched$hist`` to get statistics about `parallel-job.sh`. `r config$remote$prompt` `r config$sched$hist` ``` -```output - JobID JobName Partition Account AllocCPUS State ExitCode ------------- ---------- ---------- ---------- ---------- ---------- -------- -7 file.sh cpubase_b+ def-spons+ 1 COMPLETED 0:0 -7.batch batch def-spons+ 1 COMPLETED 0:0 -7.extern extern def-spons+ 1 COMPLETED 0:0 -8 file.sh cpubase_b+ def-spons+ 1 COMPLETED 0:0 -8.batch batch def-spons+ 1 COMPLETED 0:0 -8.extern extern def-spons+ 1 COMPLETED 0:0 -9 example-j+ cpubase_b+ def-spons+ 1 COMPLETED 0:0 -9.batch batch def-spons+ 1 COMPLETED 0:0 -9.extern extern def-spons+ 1 COMPLETED 0:0 +```{r, child=paste(snippets, '/resources/account-history.Rmd', sep=''), eval=TRUE} ``` This shows all the jobs we ran today (note that there are multiple entries per diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/resources/account-history.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/resources/account-history.Rmd similarity index 97% rename from episodes/files/snippets/HPCC_MagicCastle_slurm/resources/account-history.snip rename to episodes/files/snippets/HPCC_MagicCastle_slurm/resources/account-history.Rmd index d5a87620..5e8a53f2 100644 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/resources/account-history.snip +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/resources/account-history.Rmd @@ -1,4 +1,4 @@ -``` +```output JobID JobName Partition Account AllocCPUS State ExitCode ------------ ---------- ---------- ---------- ---------- ---------- -------- 7 file.sh cpubase_b+ def-spons+ 1 COMPLETED 0:0 @@ -10,5 +10,4 @@ 9 example-j+ cpubase_b+ def-spons+ 1 COMPLETED 0:0 9.batch batch def-spons+ 1 COMPLETED 0:0 9.extern extern def-spons+ 1 COMPLETED 0:0 -``` -{: .output} +``` \ No newline at end of file diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/resources/monitor-processes-top.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/resources/monitor-processes-top.snip deleted file mode 100644 index 12685735..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/resources/monitor-processes-top.snip +++ /dev/null @@ -1,19 +0,0 @@ -``` -top - 21:00:19 up 3:07, 1 user, load average: 1.06, 1.05, 0.96 -Tasks: 311 total, 1 running, 222 sleeping, 0 stopped, 0 zombie -%Cpu(s): 7.2 us, 3.2 sy, 0.0 ni, 89.0 id, 0.0 wa, 0.2 hi, 0.2 si, 0.0 st -KiB Mem : 16303428 total, 8454704 free, 3194668 used, 4654056 buff/cache -KiB Swap: 8220668 total, 8220668 free, 0 used. 11628168 avail Mem - - PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND - 1693 jeff 20 0 4270580 346944 171372 S 29.8 2.1 9:31.89 gnome-shell - 3140 jeff 20 0 3142044 928972 389716 S 27.5 5.7 13:30.29 Web Content - 3057 jeff 20 0 3115900 521368 231288 S 18.9 3.2 10:27.71 firefox - 6007 jeff 20 0 813992 112336 75592 S 4.3 0.7 0:28.25 tilix - 1742 jeff 20 0 975080 164508 130624 S 2.0 1.0 3:29.83 Xwayland - 1 root 20 0 230484 11924 7544 S 0.3 0.1 0:06.08 systemd - 68 root 20 0 0 0 0 I 0.3 0.0 0:01.25 kworker/4:1 - 2913 jeff 20 0 965620 47892 37432 S 0.3 0.3 0:11.76 code - 2 root 20 0 0 0 0 S 0.0 0.0 0:00.02 kthreadd -``` -{: .output} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/resources/system-memory-free.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/resources/system-memory-free.snip deleted file mode 100644 index ec4c0d3f..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/resources/system-memory-free.snip +++ /dev/null @@ -1,6 +0,0 @@ -``` -total used free shared buff/cache available -Mem: 3.8G 1.5G 678M 327M 1.6G 1.6G -Swap: 3.9G 170M 3.7G -``` -{: .output} From f91826fb8dd3614fc6b78871d1a661df2dc68cdf Mon Sep 17 00:00:00 2001 From: Trevor Keller Date: Wed, 5 Nov 2025 15:45:33 -0500 Subject: [PATCH 6/7] Ghastly Mistakes appears to work --- episodes/10-hpc-intro.Rmd | 5 ++ episodes/11-connecting.Rmd | 10 +-- episodes/15-modules.Rmd | 11 ++- episodes/18-resources.Rmd | 4 +- .../EPCC_ARCHER2_slurm/_config_options.yml | 63 ------------- .../EPCC_ARCHER2_slurm/cluster/queue-info.Rmd | 8 -- .../cluster/specific-node-info.Rmd | 11 --- .../EPCC_ARCHER2_slurm/login_output.Rmd | 26 ------ .../modules/available-modules.Rmd | 26 ------ .../modules/missing-python.snip | 1 - .../modules/module-load-python.snip | 2 - .../modules/module_list.snip | 16 ---- .../modules/python-executable-dir.snip | 1 - .../modules/python-ls-dir-command.snip | 1 - .../modules/python-ls-dir-output.snip | 2 - .../modules/python-module-path.snip | 1 - .../modules/software-dependencies.snip | 80 ----------------- .../modules/wrong-gcc-version.snip | 73 --------------- .../resources/job-detail.Rmd | 2 - .../resources/perf-exercise.Rmd | 67 -------------- .../resources/pi-mpi-details.Rmd | 16 ---- .../resources/runtime-exercise.Rmd | 47 ---------- .../resources/serial-submit.Rmd | 25 ------ .../resources/sharpen-details.snip | 18 ---- .../resources/view-output.snip | 12 --- .../scheduler/basic-job-script.Rmd | 3 - .../scheduler/basic-job-status.Rmd | 8 -- .../EPCC_ARCHER2_slurm/scheduler/del_job.Rmd | 3 - .../scheduler/filesystem_issues.Rmd | 23 ----- .../scheduler/job-with-name-status.Rmd | 4 - .../EPCC_ARCHER2_slurm/scheduler/long_job.Rmd | 11 --- .../scheduler/ls-pgm-output.Rmd | 4 - .../scheduler/option-flags-list.Rmd | 5 -- .../scheduler/parallel-challenge.Rmd | 27 ------ .../scheduler/parallel-challenge2.Rmd | 29 ------ .../scheduler/parallel-launch-desc.Rmd | 15 ---- .../scheduler/parallel-script.Rmd | 15 ---- .../scheduler/print-sched-variables.Rmd | 12 --- .../scheduler/runtime-exceeded-job.Rmd | 3 - .../scheduler/runtime-exceeded-output.Rmd | 5 -- .../scheduler/squeue_pending.Rmd | 4 - .../scheduler/squeue_running.Rmd | 2 - .../scheduler/terminate-job-begin.Rmd | 6 -- .../scheduler/terminate-job-cancel.Rmd | 3 - .../scheduler/terminate-multiple-jobs.Rmd | 9 -- .../scheduler/using-nodes-interactively.Rmd | 33 ------- .../Ghastly_Mistakes/_config_options.yml | 3 +- .../Ghastly_Mistakes/cluster/queue-info.Rmd | 5 +- .../cluster/specific-node-info.Rmd | 2 +- .../modules/available-modules.Rmd | 17 +++- .../modules/default-modules.snip | 4 - ...missing-python.snip => missing-python.Rmd} | 15 ++-- .../modules/module-load-python.Rmd | 8 ++ .../modules/module-load-python.snip | 5 -- ...-output.snip => python-executable-dir.Rmd} | 5 +- .../modules/python-executable-dir.snip | 4 - .../modules/python-ls-dir-command.snip | 4 - .../modules/python-module-path.snip | 4 - .../modules/software-dependencies.Rmd} | 0 .../modules/wrong-gcc-version.snip | 5 -- .../parallel/eight-tasks-jobscript.snip | 16 ---- .../Ghastly_Mistakes/parallel/eight-tasks.Rmd | 58 ++++++++++++ .../parallel/four-tasks-jobscript.snip | 16 ---- .../Ghastly_Mistakes/parallel/four-tasks.Rmd | 56 ++++++++++++ .../parallel/one-task-jobscript.snip | 14 --- .../Ghastly_Mistakes/parallel/one-task.Rmd | 22 +++++ ...count-history.snip => account-history.Rmd} | 5 +- .../resources/monitor-processes-top.snip | 19 ---- .../resources/system-memory-free.snip | 6 -- .../scheduler/basic-job-status.Rmd | 4 +- .../scheduler/job-with-name-status.Rmd | 4 +- .../scheduler/option-flags-list.Rmd | 2 - .../scheduler/print-sched-variables.Rmd | 10 +-- .../scheduler/runtime-exceeded-output.Rmd | 2 +- .../scheduler/terminate-job-begin.Rmd | 4 +- .../scheduler/using-nodes-interactively.Rmd | 8 +- .../filezilla-ssh-tunnel-instructions.snip | 0 .../modules/available-modules.Rmd | 2 +- .../modules/python-module-path.snip | 4 - .../modules/software-dependencies.Rmd} | 44 ++++----- .../modules/wrong-gcc-version.snip | 5 -- episodes/files/snippets/rename-snippets.sh | 89 ------------------- episodes/lesson_config.yaml | 3 +- episodes/load_config.R | 10 +-- site/README.md | 2 +- 85 files changed, 236 insertions(+), 1002 deletions(-) delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/_config_options.yml delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/cluster/queue-info.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/cluster/specific-node-info.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/login_output.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/modules/available-modules.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/modules/missing-python.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/modules/module-load-python.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/modules/module_list.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-executable-dir.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-ls-dir-command.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-ls-dir-output.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-module-path.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/modules/software-dependencies.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/modules/wrong-gcc-version.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/resources/job-detail.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/resources/perf-exercise.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/resources/pi-mpi-details.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/resources/runtime-exercise.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/resources/serial-submit.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/resources/sharpen-details.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/resources/view-output.snip delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/basic-job-script.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/basic-job-status.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/del_job.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/filesystem_issues.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/job-with-name-status.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/long_job.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/ls-pgm-output.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/option-flags-list.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-challenge.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-challenge2.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-launch-desc.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-script.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/print-sched-variables.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/runtime-exceeded-job.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/runtime-exceeded-output.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/squeue_pending.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/squeue_running.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-job-begin.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-job-cancel.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-multiple-jobs.Rmd delete mode 100644 episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/using-nodes-interactively.Rmd delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/default-modules.snip rename episodes/files/snippets/Ghastly_Mistakes/modules/{missing-python.snip => missing-python.Rmd} (82%) create mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.Rmd delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.snip rename episodes/files/snippets/Ghastly_Mistakes/modules/{python-ls-dir-output.snip => python-executable-dir.Rmd} (85%) delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.snip delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-command.snip delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/python-module-path.snip rename episodes/files/snippets/{HPCC_MagicCastle_slurm/modules/software-dependencies.snip => Ghastly_Mistakes/modules/software-dependencies.Rmd} (100%) delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/modules/wrong-gcc-version.snip delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks-jobscript.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks.Rmd delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks-jobscript.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks.Rmd delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/parallel/one-task-jobscript.snip create mode 100644 episodes/files/snippets/Ghastly_Mistakes/parallel/one-task.Rmd rename episodes/files/snippets/Ghastly_Mistakes/resources/{account-history.snip => account-history.Rmd} (97%) delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/resources/monitor-processes-top.snip delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/resources/system-memory-free.snip delete mode 100644 episodes/files/snippets/Ghastly_Mistakes/transferring-files/filezilla-ssh-tunnel-instructions.snip delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-module-path.snip rename episodes/files/snippets/{Ghastly_Mistakes/modules/software-dependencies.snip => HPCC_MagicCastle_slurm/modules/software-dependencies.Rmd} (80%) delete mode 100644 episodes/files/snippets/HPCC_MagicCastle_slurm/modules/wrong-gcc-version.snip delete mode 100755 episodes/files/snippets/rename-snippets.sh diff --git a/episodes/10-hpc-intro.Rmd b/episodes/10-hpc-intro.Rmd index 6b7e88c9..6059c5a7 100644 --- a/episodes/10-hpc-intro.Rmd +++ b/episodes/10-hpc-intro.Rmd @@ -4,6 +4,11 @@ teaching: 15 exercises: 5 --- +```{r, echo=FALSE} +# Source the external configuration script +source("load_config.R") +``` + ::::::::::::::::::::::::::::::::::::::: objectives - Describe what an HPC system is diff --git a/episodes/11-connecting.Rmd b/episodes/11-connecting.Rmd index 103d0379..428ac5e6 100644 --- a/episodes/11-connecting.Rmd +++ b/episodes/11-connecting.Rmd @@ -4,6 +4,11 @@ teaching: 25 exercises: 10 --- +```{r, echo=FALSE} +# Source the external configuration script +source("load_config.R") +``` + ::::::::::::::::::::::::::::::::::::::: objectives - Configure secure access to a remote HPC system. @@ -17,11 +22,6 @@ exercises: 10 :::::::::::::::::::::::::::::::::::::::::::::::::: -```{r, echo=FALSE} -# Source the external configuration script -source("load_config.R") -``` - ## Secure Connections The first step in using a cluster is to establish a connection from our laptop diff --git a/episodes/15-modules.Rmd b/episodes/15-modules.Rmd index db1d387e..fc044cfa 100644 --- a/episodes/15-modules.Rmd +++ b/episodes/15-modules.Rmd @@ -82,6 +82,8 @@ To see available software modules, use `module avail`: ```{r, child=paste(snippets, '/modules/available-modules.Rmd', sep=''), eval=TRUE} ``` +Note that piping the output through `less` allows us to search within the output using the / key. + ### Listing Currently Loaded Modules You can use the `module list` command to see which modules you currently have @@ -175,13 +177,14 @@ software is loaded. Let's examine the output of `module avail` more closely, using the pager since there may be reams of output: -```bash -`r config$remote$prompt` module avail | less -``` - ```{r, child=paste(snippets, '/modules/available-modules.Rmd', sep=''), eval=TRUE} ``` +If the software your Slurm script runs requires on a specific version +of a dependency, make sure you use the full name of the module, rather +than the _default_ loaded when you give only its name (up to the first +slash). + ::::::::::::::::::::::::::::::::::::::: challenge ## Using Software Modules in Scripts diff --git a/episodes/18-resources.Rmd b/episodes/18-resources.Rmd index d95d8b53..86901f40 100644 --- a/episodes/18-resources.Rmd +++ b/episodes/18-resources.Rmd @@ -31,8 +31,8 @@ might matter. ## Estimating Required Resources Using the Scheduler -Although we covered requesting resources from the scheduler earlier with the -π code, how do we know what type of resources the software will need in +Although we covered requesting resources from the scheduler earlier, +how do we know what type of resources the software will need in the first place, and its demand for each? In general, unless the software documentation or user testimonials provide some idea, we won't know how much memory or compute time a program will need. diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/_config_options.yml b/episodes/files/snippets/EPCC_ARCHER2_slurm/_config_options.yml deleted file mode 100644 index 694a44a9..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/_config_options.yml +++ /dev/null @@ -1,63 +0,0 @@ -#------------------------------------------------------------ -# ComputeCanada: Graham + Slurm -#------------------------------------------------------------ - -# Cluster host and scheduler options: the defaults come from -# Graham at Compute Canada, running Slurm. Other options can -# be found in the library of snippets, -# `_includes/snippets_library`. To use one, replace options -# below with those in `_config_options.yml` from the -# library. E.g, to customise for Cirrus at EPCC, running -# PBS, we could replace the options below with those from -# -# _includes/snippets_library/EPCC_Cirrus_pbs/_config_options.yml -# -# If your cluster is not represented in the library, please -# copy an existing folder, rename it, and customize for your -# installation. Remember to keep the leading slash on the -# `snippets` variable below! - -snippets: "/snippets_library/EPCC_ARCHER2_slurm" - -local: - prompt: "[user@laptop ~]$" - bash_shebang: "#!/bin/bash" - -remote: - name: "ARCHER2" - login: "login.archer2.ac.uk" - host: "ln03" - node: "nid001053" - location: "EPCC, The University of Edinburgh" - homedir: "/home/ta076/ta076/" - user: "userid" - prompt: "userid@ln03:~>" - prompt-work: "userid@ln03:/work/ta076/ta076/userid>" - module_python3: "cray-python" - bash_shebang: "#!/bin/bash" - -sched: - name: "Slurm" - reservation: "shortqos" - budget: "short" - submit: - name: "sbatch" - options: "--partition=standard --qos=short" - queue: - debug: "" - testing: "" - status: "squeue" - flag: - user: "-u userid" - interactive: "" - histdetail: "-l -j" - name: "--job-name" - time: "--time" - queue: "--partition" - nodes: "--nodes" - tasks: "" - del: "scancel" - interactive: "srun" - info: "sinfo" - comment: "#SBATCH" - hist: "sacct" diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/cluster/queue-info.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/cluster/queue-info.Rmd deleted file mode 100644 index b9573160..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/cluster/queue-info.Rmd +++ /dev/null @@ -1,8 +0,0 @@ -```output -PARTITION AVAIL TIMELIMIT NODES STATE NODELIST -standard up 1-00:00:00 27 drain* nid[001029,001050,001149,001363,001366,001391,001552,001568,001620,001642,001669,001672-001675,001688,001690-001691,001747,001751,001783,001793,001812,001832-001835] -standard up 1-00:00:00 5 down* nid[001024,001026,001064,001239,001898] -standard up 1-00:00:00 8 drain nid[001002,001028,001030-001031,001360-001362,001745] -standard up 1-00:00:00 945 alloc nid[001000-001001,001003-001023,001025,001027,001032-001037,001040-001049,001051-001063,001065-001108,001110-001145,001147,001150-001238,001240-001264,001266-001271,001274-001334,001337-001359,001364-001365,001367-001390,001392-001551,001553-001567,001569-001619,001621-001637,001639-001641,001643-001668,001670-001671,001676,001679-001687,001692-001734,001736-001744,001746,001748-001750,001752-001782,001784-001792,001794-001811,001813-001824,001826-001831,001836-001890,001892-001897,001899-001918,001920,001923-001934,001936-001945,001947-001965,001967-001981,001984-001991,002006-002023] -standard up 1-00:00:00 37 resv nid[001038-001039,001109,001146,001148,001265,001272-001273,001335-001336,001638,001677-001678,001735,001891,001919,001921-001922,001935,001946,001966,001982-001983,001992-002005] -``` diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/cluster/specific-node-info.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/cluster/specific-node-info.Rmd deleted file mode 100644 index 54a94855..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/cluster/specific-node-info.Rmd +++ /dev/null @@ -1,11 +0,0 @@ -::: challenge -## Explore a Worker Node - -Finally, let's look at the resources available on the worker nodes -where your jobs will actually run. Try running this command to see -the name, CPUs and memory available on one of the worker nodes: - -```bash -`r config$remote$prompt` sinfo -n nid001053 -o "%n %c %m" -``` -::: diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/login_output.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/login_output.Rmd deleted file mode 100644 index dbba457f..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/login_output.Rmd +++ /dev/null @@ -1,26 +0,0 @@ -```output -This node is running Cray's Linux Environment version 1.3.2 - -####################################################################################### - - @@@@@@@@@ - @@@ @@@ _ ____ ____ _ _ _____ ____ ____ - @@@ @@@@@ @@@ / \ | _ \ / ___| | | | | | ____| | _ \ |___ \ - @@@ @@ @@ @@@ / _ \ | |_) | | | | |_| | | _| | |_) | __) | - @@ @@ @@@ @@ @@ / ___ \ | _ < | |___ | _ | | |___ | _ < / __/ - @@ @@ @@@ @@ @@ /_/ \_\ |_| \_\ \____| |_| |_| |_____| |_| \_\ |_____| - @@@ @@ @@ @@@ - @@@ @@@@@ @@@ https://www.archer2.ac.uk/support-access/ - @@@ @@@ - @@@@@@@@@ - - - U K R I - E P C C - H P E C r a y - - -Hostname: uan01 -Distribution: SLES 15.1 1 -CPUS: 256 -Memory: 257.4GB -Configured: 2021-04-27 - -###################################################################################### -``` \ No newline at end of file diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/available-modules.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/available-modules.Rmd deleted file mode 100644 index 132ecf1f..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/available-modules.Rmd +++ /dev/null @@ -1,26 +0,0 @@ -```output ------------ /work/y07/shared/archer2-modules/modulefiles-cse-pyvenvs ----------- -tensorflow/2.3.1-py38 torch/1.6.0-py38 - ------------ /work/y07/shared/archer2-modules/modulefiles-cse-pymods ------------ -python-netCDF4/1.5.5.1 - ------------- /work/y07/shared/archer2-modules/modulefiles-cse-utils ------------ -bolt/0.7 ncview/ncview-2.1.7-gcc-10.1.0 vmd/1.9.3-mpi-gcc10 -cmake/3.18.4 reframe/3.2 xios/2.5-gcc10 -ed/1.16-gcc10 tcl/8.4.20-gcc10 xthi/1.0 -epcc-job-env tcl/8.5.0-gcc10 xthi/1.0-gcc10 -epcc-reframe/0.1 tcl/8.6.0-gcc10 -genmaskcpu/1.0 tcl/8.6.10-gcc10(default) -gnuplot/5.4.1-gcc-10.1.0 tk/8.5.6-gcc10 -lzip/1.20-gcc10 tk/8.6.10-gcc10(default) -nco/4.9.6 visidata/2.1 -nco/4.9.6-gcc-10.1.0 vmd/1.9.3-gcc10(default) - ------------- /work/y07/shared/archer2-modules/modulefiles-cse-libs ------------- -adios/1.13.1 hypre/2.18.0 mumps/5.2.1 superlu-dist/6.1.1 -boost/1.72.0 libxml2/2.9.7-gcc-9.3.0 parmetis/4.0.3 superlu/5.2.1 -glm/0.9.9.6 matio/1.5.18 petsc/3.13.3 trilinos/12.18.1 -gmp/6.1.2-gcc10 metis/5.1.0 scotch/6.0.10 -... -``` \ No newline at end of file diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/missing-python.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/missing-python.snip deleted file mode 100644 index 381142dc..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/missing-python.snip +++ /dev/null @@ -1 +0,0 @@ -/usr/bin/python3 diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/module-load-python.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/module-load-python.snip deleted file mode 100644 index d0e214d5..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/module-load-python.snip +++ /dev/null @@ -1,2 +0,0 @@ -{{ site.host_prompt-work }} module load cray-python -{{ site.host_prompt-work }} which python3 diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/module_list.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/module_list.snip deleted file mode 100644 index c180fda5..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/module_list.snip +++ /dev/null @@ -1,16 +0,0 @@ -Currently Loaded Modulefiles: - 1) cpe-cray - 2) cce/10.0.4(default) - 3) craype/2.7.2(default) - 4) craype-x86-rome - 5) libfabric/1.11.0.0.233(default) - 6) craype-network-ofi - 7) cray-dsmml/0.1.2(default) - 8) perftools-base/20.10.0(default) - 9) xpmem/2.2.35-7.0.1.0_1.9__gd50fabf.shasta(default) -10) cray-mpich/8.0.16(default) -11) cray-libsci/20.10.1.2(default) -12) bolt/0.7 -13) /work/y07/shared/archer2-modules/modulefiles-cse/epcc-setup-env -14) /usr/local/share/epcc-module/epcc-module-loader -15) /work/y07/shared/archer2-modules/modulefiles-cse/epcc-setup-env-profile diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-executable-dir.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-executable-dir.snip deleted file mode 100644 index b9f4b294..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-executable-dir.snip +++ /dev/null @@ -1 +0,0 @@ -/opt/cray/pe/python/3.8.5.0/bin/python3 diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-ls-dir-command.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-ls-dir-command.snip deleted file mode 100644 index f87667f8..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-ls-dir-command.snip +++ /dev/null @@ -1 +0,0 @@ -{{ site.host_prompt-work }} ls /opt/cray/pe/python/3.8.5.0/bin diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-ls-dir-output.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-ls-dir-output.snip deleted file mode 100644 index b8411602..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-ls-dir-output.snip +++ /dev/null @@ -1,2 +0,0 @@ -2to3 cygdb cythonize easy_install-3.8 f2py3 idle3 nosetests pip pip3.8 pydoc3.8 py.test python3 python3.8-config -2to3-3.8 cython easy_install f2py f2py3.8 idle3.8 nosetests-3.8 pip3 pydoc3 pytest python python3.8 python3-config diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-module-path.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-module-path.snip deleted file mode 100644 index b8480b0e..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-module-path.snip +++ /dev/null @@ -1 +0,0 @@ -/opt/cray/pe/python/3.8.5.0/bin:/lus/cls01095/work/z19/z19/aturner/.local/bin:/lus/cls01095/work/y07/shared/bolt/0.7/bin:/work/y07/shared/utils/bin:/usr/local/maven/bin:/opt/cray/pe/perftools/20.10.0/bin:/opt/cray/pe/papi/6.0.0.4/bin:/opt/cray/libfabric/1.11.0.0.233/bin:/opt/cray/pe/craype/2.7.2/bin:/opt/cray/pe/cce/10.0.4/cce-clang/x86_64/bin:/opt/cray/pe/cce/10.0.4/binutils/x86_64/x86_64-pc-linux-gnu/bin:/opt/cray/pe/cce/10.0.4/binutils/cross/x86_64-aarch64/aarch64-linux-gnu/../bin:/opt/cray/pe/cce/10.0.4/utils/x86_64/bin:/usr/local/Modules/bin:/home/z19/z19/aturner/bin:/usr/local/bin:/usr/bin:/bin:/opt/cray/pe/bin:/usr/lib/mit/bin diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/software-dependencies.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/software-dependencies.snip deleted file mode 100644 index f555f415..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/software-dependencies.snip +++ /dev/null @@ -1,80 +0,0 @@ -To demonstrate, let's use `module list`. `module list` shows all loaded software modules. - -``` -{{ site.host_prompt }} module list -``` -{: .language-bash} -``` -Currently Loaded Modules: - 1) nixpkgs/.16.09 (H,S) 3) gcccore/.5.4.0 (H) 5) intel/2016.4 (t) 7) StdEnv/2016.4 (S) - 2) icc/.2016.4.258 (H) 4) ifort/.2016.4.258 (H) 6) openmpi/2.1.1 (m) 8) python/3.5.2 (t) - - Where: - S: Module is Sticky, requires --force to unload or purge - m: MPI implementations / Implémentations MPI - t: Tools for development / Outils de développement - H: Hidden Module -``` -{: .output} - -``` -{{ site.host_prompt }} module load beast -{{ site.host_prompt }} module list -``` -{: .language-bash} -``` -Currently Loaded Modules: - 1) nixpkgs/.16.09 (H,S) 5) intel/2016.4 (t) 9) java/1.8.0_121 (t) - 2) icc/.2016.4.258 (H) 6) openmpi/2.1.1 (m) 10) beagle-lib/2.1.2 (bio) - 3) gcccore/.5.4.0 (H) 7) StdEnv/2016.4 (S) 11) beast/2.4.0 (chem) - 4) ifort/.2016.4.258 (H) 8) python/3.5.2 (t) - - Where: - S: Module is Sticky, requires --force to unload or purge - bio: Bioinformatic libraries/apps / Logiciels de bioinformatique - m: MPI implementations / Implémentations MPI - t: Tools for development / Outils de développement - chem: Chemistry libraries/apps / Logiciels de chimie - H: Hidden Module -``` -{: .output} - -So in this case, loading the `beast` module (a bioinformatics software package), also loaded -`java/1.8.0_121` and `beagle-lib/2.1.2` as well. Let's try unloading the `beast` package. - -``` -{{ site.host_prompt }} module unload beast -{{ site.host_prompt }} module list -``` -{: .language-bash} -``` -Currently Loaded Modules: - 1) nixpkgs/.16.09 (H,S) 3) gcccore/.5.4.0 (H) 5) intel/2016.4 (t) 7) StdEnv/2016.4 (S) - 2) icc/.2016.4.258 (H) 4) ifort/.2016.4.258 (H) 6) openmpi/2.1.1 (m) 8) python/3.5.2 (t) - - Where: - S: Module is Sticky, requires --force to unload or purge - m: MPI implementations / Implémentations MPI - t: Tools for development / Outils de développement - H: Hidden Module -``` -{: .output} - -So using `module unload` "un-loads" a module along with its dependencies. -If we wanted to unload everything at once, we could run `module purge` (unloads everything). - -``` -{{ site.host_prompt }} module purge -``` -{: .language-bash} -``` -The following modules were not unloaded: - (Use "module --force purge" to unload all): - - 1) StdEnv/2016.4 3) icc/.2016.4.258 5) ifort/.2016.4.258 7) imkl/11.3.4.258 - 2) nixpkgs/.16.09 4) gcccore/.5.4.0 6) intel/2016.4 8) openmpi/2.1.1 -``` -{: .output} - -Note that `module purge` is informative. It lets us know that all but a default set of packages -have been unloaded (and how to actually unload these if we truly so desired). diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/wrong-gcc-version.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/wrong-gcc-version.snip deleted file mode 100644 index 9e0f7666..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/wrong-gcc-version.snip +++ /dev/null @@ -1,73 +0,0 @@ -Let's take a closer look at the `gcc` module. GCC is an extremely widely used C/C++/Fortran -compiler. Tons of software is dependent on the GCC version, and might not compile or run if the -wrong version is loaded. In this case, there are two different versions: `gcc/4.8.5` and -`gcc/5.4.0`. How do we load each copy and which copy is the default? - -In this case, `gcc/5.4.0` has a `(D)` next to it. This indicates that it is the default - if we type -`module load gcc`, this is the copy that will be loaded. - -``` -{{ site.host_prompt }} module load gcc -{{ site.host_prompt }} gcc --version -``` -{: .language-bash} -``` -Lmod is automatically replacing "intel/2016.4" with "gcc/5.4.0". - - -Due to MODULEPATH changes, the following have been reloaded: - 1) openmpi/2.1.1 - -gcc (GCC) 5.4.0 -Copyright (C) 2015 Free Software Foundation, Inc. -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -``` -{: .output} - -Note that three things happened: the default copy of GCC was loaded (version 5.4.0), the Intel -compilers (which conflict with GCC) were unloaded, and software that is dependent on compiler -(OpenMPI) was reloaded. The `module` system turned what might be a super-complex operation into a -single command. - -So how do we load the non-default copy of a software package? In this case, the only change we need -to make is be more specific about the module we are loading. There are two GCC modules: `gcc/5.4.0` -and `gcc/4.8.5`. To load a non-default module, the only change we need to make to our `module load` -command is to leave in the version number after the `/`. - -``` -{{ site.host_prompt }} module load gcc/4.8.5 -{{ site.host_prompt }} gcc --version -``` -{: .language-bash} -``` -Inactive Modules: - 1) openmpi - -The following have been reloaded with a version change: - 1) gcc/5.4.0 => gcc/4.8.5 - -gcc (GCC) 4.8.5 -Copyright (C) 2015 Free Software Foundation, Inc. -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -``` -{: .output} - -We now have successfully switched from GCC 5.4.0 to GCC 4.8.5. It is also important to note that -there was no compatible OpenMPI module available for GCC 4.8.5. Because of this, the `module` -program has "inactivated" the module. All this means for us is that if we re-load GCC 5.4.0, -`module` will remember OpenMPI used to be loaded and load that module as well. - -``` -{{ site.host_prompt }} module load gcc/5.4.0 -``` -{: .language-bash} -``` -Activating Modules: - 1) openmpi/2.1.1 - -The following have been reloaded with a version change: - 1) gcc/4.8.5 => gcc/5.4.0 -``` -{: .output} diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/job-detail.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/job-detail.Rmd deleted file mode 100644 index 880b8421..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/job-detail.Rmd +++ /dev/null @@ -1,2 +0,0 @@ -JOBID USER ACCOUNT NAME ST REASON START_TIME T... -36856 yourUsername yourAccount example-job.sh R None 2017-07-01T16:47:02 ... diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/perf-exercise.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/perf-exercise.Rmd deleted file mode 100644 index f79772f7..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/perf-exercise.Rmd +++ /dev/null @@ -1,67 +0,0 @@ -::: challenge -## Computing the speedup and parallel efficiency -Use your *Overall run times* from above to fill in a table like the one below. - -| Cores | Overall run time (s) | Ideal speedup | Actual speedup | Parallel efficiency | -|------------|----------------------|---------------|----------------|---------------------| -| 1 (serial) | | | | | -| 2 | | | | | -| 4 | | | | | -| 8 | | | | | -| 16 | | | | | -| 32 | | | | | -| 64 | | | | | -| 128 | | | | | -| 256 | | | | | - -Given your results, try to answer the following questions: - -1. What is the core count where you get the **most** efficient use of resources, irrespective - of run time? -2. What is the core count where you get the fastest solution, irrespective of efficiency? -3. What do you think a good core count choice would be for this application that balances - time to solution and efficiency? Why did you choose this option? - -::: solution - -The table below gives example results for `r config$remote$name` based on the example -runtimes given in the solution above. - -| Cores | Overall run time (s) | Ideal speedup | Actual speedup | Parallel efficiency | -|-----------:|---------------------:|--------------:|---------------:|--------------------:| -| 1 | 3.931 | 1.000 | 1.000 | 1.000 | -| 2 | 2.002 | 2.000 | 1.963 | 0.982 | -| 4 | 1.048 | 4.000 | 3.751 | 0.938 | -| 8 | 0.572 | 8.000 | 6.872 | 0.859 | -| 16 | 0.613 | 16.000 | 6.408 | 0.401 | -| 32 | 0.360 | 32.000 | 10.928 | 0.342 | -| 64 | 0.249 | 64.000 | 15.767 | 0.246 | -| 128 | 0.170 | 128.000 | 23.122 | 0.181 | -| 256 | 0.187 | 256.000 | 21.077 | 0.082 | - -### What is the core count where you get the **most** efficient use of resources? -Just using a single core is the cheapest (and always will be unless your speedup is better -than perfect – “super-linear” speedup). However, it may not be possible to run on small -numbers of cores depending on how much memory you need or other technical constraints. -**Note:** on most high-end systems, nodes are not shared between users. This means you are -charged for all the CPU-cores on a node regardless of whether you actually use them. Typically -we would be running on many hundreds of CPU-cores not a few tens, so the real question in -practice is: what is the optimal number of nodes to use? -### What is the core count where you get the fastest solution, irrespective of efficiency? -256 cores gives the fastest time to solution. -The fastest time to solution does not often make the most efficient use of resources so -to use this option, you may end up wasting your resources. Sometimes, when there is -time pressure to run the calculations, this may be a valid approach to running -applications. -### What do you think a good core count choice would be for this application to use? - -8 cores is probably a good number of cores to use with a parallel efficiency of 86%. -Usually, the best choice is one that delivers good parallel efficiency with an acceptable -time to solution. Note that *acceptable time to solution* differs depending on circumstances -so this is something that the individual researcher will have to assess. Good parallel -efficiency is often considered to be 70% or greater though many researchers will be happy -to run in a regime with parallel efficiency greater than 60%. As noted above, running with -worse parallel efficiency may also be useful if the time to solution is an overriding factor. - -::: -::: \ No newline at end of file diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/pi-mpi-details.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/pi-mpi-details.Rmd deleted file mode 100644 index 086f90e8..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/pi-mpi-details.Rmd +++ /dev/null @@ -1,16 +0,0 @@ -::: prereq - -## Required Files - -The program used in this example can be retrieved using wget or a browser and copied to the remote. - -**Using wget**: -```bash -`r config$remote$prompt` wget `r config$url``r config$baseurl`/files/pi-mpi.py -``` - -**Using a web browser**: - -[`r config$url``r config$baseurl`/files/pi-mpi.py](`r config$url``r config$baseurl`/files/pi-mpi.py) - -::: \ No newline at end of file diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/runtime-exercise.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/runtime-exercise.Rmd deleted file mode 100644 index 4977a00e..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/runtime-exercise.Rmd +++ /dev/null @@ -1,47 +0,0 @@ -::: challenge -## Benchmarking the parallel performance -Modify your job script to run on multiple cores and evaluate the performance of `pi-mpi.py` -on a variety of different core counts and use multiple runs to complete a table like the one -below. -If you examine the log file you will see that it contains two timings: the total time taken by the -entire program and the time taken solely by the calculation. The calculation of Pi from the Monte-Carlo counts -is not parallelised so this is a serial overhead, performed by a single processor. -The calculation part is, in theory, perfectly parallel (each processor operates on independent sets of unique random numbers -) so this should get faster on more cores. The Calculation core seconds is the -*calculation time* multiplied by the number of cores. - -| Cores | Overall run time (s) | Calculation time (s) | Calculation core seconds | -|------------|----------------------|----------------------|--------------------------| -| 1 (serial) | | | | -| 2 | | | | -| 4 | | | | -| 8 | | | | -| 16 | | | | -| 32 | | | | -| 64 | | | | -| 128 | | | | -| 256 | | | | - -Look at your results – do they make sense? Given the structure of the code, you would -expect the performance of the calculation to increase -linearly with the number of cores: this would give a roughly constant figure for the Calculation core -seconds. Is this what you observe? - -::: solution - -The table below shows example timings for runs on `r config$remote$name` - -| Cores | Overall run time (s) | Calculation time (s) | Calculation core seconds | -|-----------:|---------------------:|---------------------:|-------------------------------:| -| 1 | 3.931 | 3.854 | 3.854 | -| 2 | 2.002 | 1.930 | 3.859 | -| 4 | 1.048 | 0.972 | 3.888 | -| 8 | 0.572 | 0.495 | 3.958 | -| 16 | 0.613 | 0.536 | 8.574 | -| 32 | 0.360 | 0.278 | 8.880 | -| 64 | 0.249 | 0.163 | 10.400 | -| 128 | 0.170 | 0.083 | 10.624 | -| 256 | 0.187 | 0.135 | 34.560 | - -::: -::: \ No newline at end of file diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/serial-submit.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/serial-submit.Rmd deleted file mode 100644 index b7f68819..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/serial-submit.Rmd +++ /dev/null @@ -1,25 +0,0 @@ -Creating a file called `submit-pi-mpi.slurm`: - -```bash -#!/bin/bash -#SBATCH --partition=`r config$sched$partition` -#SBATCH --qos=`r config$sched$qos` -#SBATCH --reservation=`r config$sched$reservation` - -#SBATCH --job-name=pi-mpi -#SBATCH --nodes=1 -#SBATCH --tasks-per-node=1 -#SBATCH --time=00:15:00 -srun python pi-mpi.py 10000000 -``` - -Run application using a single process (i.e. in serial) with a blocking `srun` command: -```bash -srun python pi-mpi.py 10000000 -``` - -Submit with to the batch queue with: - -```bash -`r config$sched$prompt_work` `r config$sched$submit.name` submit-pi-mpi.slurm -``` \ No newline at end of file diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/sharpen-details.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/sharpen-details.snip deleted file mode 100644 index 08172fbe..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/sharpen-details.snip +++ /dev/null @@ -1,18 +0,0 @@ -The `sharpen` program has been precompiled on {{ site.remote.name }}, you can add it to your `PATH` variable with the commands: - -``` -export PATH=/work/{{ site.sched.project }}/{{ site.sched.project }}/shared/bin:$PATH -export FUZZY_INPUT=/work/{{ site.sched.project }}/{{ site.sched.project }}/shared/fuzzy.pgm -`````` -{: .language-bash} - -Once you have set the required environment variables, you can access the program as `sharpen-mpi`. -You will also need to get a copy of the input file for this application. -To do this, copy it from the -central install location to your directory with (note you must have loaded the -sharpen module as described above for this to work): - -``` -{{ site.host_prompt }} cp $FUZZY_INPUT . -``` -{: .language-bash} diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/view-output.snip b/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/view-output.snip deleted file mode 100644 index 5b2a572b..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/view-output.snip +++ /dev/null @@ -1,12 +0,0 @@ -> ## Viewing the sharpened output image -> To see the effect of the sharpening algorithm, you can view the images using the display -> program from the ImageMagick suite. -> ``` -> display sharpened.pgm -> ``` -> Type `q` in the image window to close the program. To view the image you will need an X -> window client installed and you will have to have logged into {{ site.host_name }} with the `ssh -Y` -> option to export the display back to your local system. If you are using Windows, the -> MobaXterm program provides a login shell with X capability. If you are using macOS, then -> you will need to install XQuartz. If you are using Linux then X should just work! -{: .callout} \ No newline at end of file diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/basic-job-script.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/basic-job-script.Rmd deleted file mode 100644 index 0c2eb8fa..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/basic-job-script.Rmd +++ /dev/null @@ -1,3 +0,0 @@ -```output -Submitted batch job 36855 -``` diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/basic-job-status.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/basic-job-status.Rmd deleted file mode 100644 index e98253bd..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/basic-job-status.Rmd +++ /dev/null @@ -1,8 +0,0 @@ -```output -JOBID USER ACCOUNT NAME ST REASON START_TIME T... -36856 yourUsername yourAccount example-job.sh R None 2017-07-01T16:47:02 ... -``` - -We can see all the details of our job, most importantly that it is in the `R` -or `RUNNING` state. Sometimes our jobs might need to wait in a queue -(`PENDING`) or have an error (`E`). diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/del_job.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/del_job.Rmd deleted file mode 100644 index 6906426a..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/del_job.Rmd +++ /dev/null @@ -1,3 +0,0 @@ -```bash -`r config$remote$prompt` scancel 38759 -``` diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/filesystem_issues.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/filesystem_issues.Rmd deleted file mode 100644 index 708dfe2e..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/filesystem_issues.Rmd +++ /dev/null @@ -1,23 +0,0 @@ -::: callout - -## Filesystem on ARCHER2 - -At this point it is important to remember that ARCHER2 has two *separate* -filesystems: `/home` and `/work`. - -* `/home` is meant for small files such as source code, and is the - filesystem that you are on when you log in -* `/work` is a much larger and faster filesystem, meant for production - runs and storing large datasets - -The `/home` filesystem **is not mounted on the compute nodes** -meaning that programs run in the batch queues cannot read from or -write to files in your home directory. This has not been a problem -so far as none of our programs have done file input or -output. However, the parallel program we will run here reads and -writes large images. - -* When you log in, you will be in your home directory, ``r config$remote$homedir``. -* Before you run real programs on ARCHER2, you **must change directory** to - `/work`. -::: diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/job-with-name-status.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/job-with-name-status.Rmd deleted file mode 100644 index c7657ef5..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/job-with-name-status.Rmd +++ /dev/null @@ -1,4 +0,0 @@ -```output -JOBID USER ACCOUNT NAME ST REASON START_TIME TIME TIME_LEFT NODES CPUS -38191 yourUsername yourAccount new_name PD Priority N/A 0:00 1:00:00 1 1 -``` diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/long_job.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/long_job.Rmd deleted file mode 100644 index ddd0d12f..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/long_job.Rmd +++ /dev/null @@ -1,11 +0,0 @@ -```bash -#!/bin/bash - -#SBATCH --partition=standard -#SBATCH --qos=short -#SBATCH --time=00:00:30 - -echo 'This script is running on:' -hostname -sleep 120 -``` diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/ls-pgm-output.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/ls-pgm-output.Rmd deleted file mode 100644 index 6e0886a7..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/ls-pgm-output.Rmd +++ /dev/null @@ -1,4 +0,0 @@ -```output --rw-r--r-- 1 userid ta028 1762743 Jun 26 17:29 fuzzy.pgm --rw------- 1 userid ta028 1678630 Jun 26 17:33 sharpened.pgm -``` diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/option-flags-list.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/option-flags-list.Rmd deleted file mode 100644 index 626c6eab..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/option-flags-list.Rmd +++ /dev/null @@ -1,5 +0,0 @@ -* `--nodes=` - Number of nodes to use -* `--ntasks-per-node=` - Number of parallel processes per node -* `--cpus-per-task=` - Number of cores to assign to each parallel process -* `--time=` - Maximum real-world time (walltime) -your job will be allowed to run. The `` part can be omitted. diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-challenge.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-challenge.Rmd deleted file mode 100644 index 205f15ff..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-challenge.Rmd +++ /dev/null @@ -1,27 +0,0 @@ -::: challenge - -## Running parallel jobs - -Modify the pi-mpi-run script that you used above to use all 128 cores on -one node. Check the output to confirm that it used the correct number -of cores in parallel for the calculation. - -:::: solution -Here is a modified script - -```bash -#!/bin/bash - -#SBATCH --partition=`r config$sched$partition` -#SBATCH --qos=`r config$sched$qos` -#SBATCH --reservation=`r config$sched$reservation` -#SBATCH --time=00:00:30 - -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=128 - -module load cray-python -srun python pi-mpi.py 10000000 -``` -:::: -::: diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-challenge2.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-challenge2.Rmd deleted file mode 100644 index 2ec4d28f..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-challenge2.Rmd +++ /dev/null @@ -1,29 +0,0 @@ -::: challenge - -## Configuring parallel jobs - -You will see in the job output that information is displayed about -where each MPI process is running, in particular which node it is -on. - -Modify the pi-mpi-run script that you run a total of 2 nodes and 16 processes; -but to use only 8 tasks on each of two nodes. -Check the output file to ensure that you understand the job -distribution. - -:::: solution -```bash -#!/bin/bash - -#SBATCH --partition=`r config$sched$partition` -#SBATCH --qos=`r config$sched$qos` -#SBATCH --time=00:00:30 - -#SBATCH --nodes=2 -#SBATCH --ntasks-per-node=8 - -module load cray-python -srun python pi-mpi.py 10000000 -``` -:::: -::: diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-launch-desc.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-launch-desc.Rmd deleted file mode 100644 index 98cbb455..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-launch-desc.Rmd +++ /dev/null @@ -1,15 +0,0 @@ -```bash -srun python pi-mpi.py 10000000 -``` - -And this corresponds to the four required items we described above: - -1. Parallel launch program: in this case the parallel launch program - is called `srun`; the additional argument controls which cores are - used. -2. Number of parallel processes per node: in this case this is 16, - and is specified by the option `--ntasks-per-node=16` option. -3. Total number of parallel processes: in this case this is also 16, - because we specified 1 node and 16 parallel processes per node. -4. Our program and arguments: in this case this is - `python pi-mpi.py 10000000`. diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-script.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-script.Rmd deleted file mode 100644 index cb09f254..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-script.Rmd +++ /dev/null @@ -1,15 +0,0 @@ -```bash -#!/bin/bash - -#SBATCH --partition=`r config$sched$partition` -#SBATCH --qos=`r config$sched$qos` -#SBATCH --reservation=`r config$sched$reservation` -#SBATCH --time=00:05:00 - -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=16 - -module load cray-python - -srun python pi-mpi.py 10000000 -``` diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/print-sched-variables.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/print-sched-variables.Rmd deleted file mode 100644 index 0d1e07e3..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/print-sched-variables.Rmd +++ /dev/null @@ -1,12 +0,0 @@ -::: challenge - -## Job environment variables - -When Slurm runs a job, it sets a number of environment variables for -the job. One of these will let us check our work from the last -problem. The `SLURM_CPUS_PER_TASK` variable is set to the number of -CPUs we requested with `-c`. Using the `SLURM_CPUS_PER_TASK` -variable, modify your job so that it prints how many CPUs have been -allocated. - -::: diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/runtime-exceeded-job.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/runtime-exceeded-job.Rmd deleted file mode 100644 index 43bd23a5..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/runtime-exceeded-job.Rmd +++ /dev/null @@ -1,3 +0,0 @@ -```bash -`r config$remote$prompt_work` cat slurm-38193.out -``` diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/runtime-exceeded-output.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/runtime-exceeded-output.Rmd deleted file mode 100644 index 9f277a14..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/runtime-exceeded-output.Rmd +++ /dev/null @@ -1,5 +0,0 @@ -```output -This job is running on: -nid001147 -slurmstepd: error: *** JOB 38193 ON cn01 CANCELLED AT 2017-07-02T16:35:48 DUE TO TIME LIMIT *** -``` diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/squeue_pending.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/squeue_pending.Rmd deleted file mode 100644 index f040e903..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/squeue_pending.Rmd +++ /dev/null @@ -1,4 +0,0 @@ -```output - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) -119867 standard myjob userid PD 0:00 1 (Resources) -``` diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/squeue_running.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/squeue_running.Rmd deleted file mode 100644 index 3233cdb7..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/squeue_running.Rmd +++ /dev/null @@ -1,2 +0,0 @@ - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 119867 standard example- userid R 0:00:06 1 nid001609 diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-job-begin.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-job-begin.Rmd deleted file mode 100644 index 214a8756..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-job-begin.Rmd +++ /dev/null @@ -1,6 +0,0 @@ -```output -Submitted batch job 38759 - -JOBID USER ACCOUNT NAME ST REASON START_TIME TIME TIME_LEFT NODES CPUS -38759 yourUsername yourAccount example-job.sh PD Priority N/A 0:00 1:00 1 1 -``` \ No newline at end of file diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-job-cancel.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-job-cancel.Rmd deleted file mode 100644 index 695180ff..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-job-cancel.Rmd +++ /dev/null @@ -1,3 +0,0 @@ -```output -JOBID USER ACCOUNT NAME ST REASON START_TIME TIME TIME_LEFT NODES CPUS -``` \ No newline at end of file diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-multiple-jobs.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-multiple-jobs.Rmd deleted file mode 100644 index ec27cd39..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-multiple-jobs.Rmd +++ /dev/null @@ -1,9 +0,0 @@ -::: challenge - -## Cancelling multiple jobs - -We can also cancel all of our jobs at once using the `-u` option. This will -delete all jobs for a specific user (in this case us). Note that you can only -delete your own jobs. Try submitting multiple jobs and then cancelling them -all with `scancel -u yourUsername`. -::: diff --git a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/using-nodes-interactively.Rmd b/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/using-nodes-interactively.Rmd deleted file mode 100644 index 2d93dd78..00000000 --- a/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/using-nodes-interactively.Rmd +++ /dev/null @@ -1,33 +0,0 @@ -`srun` runs a single command in the queue system and then exits. -Let's demonstrate this by running the -`hostname` command with `srun`. (We can cancel an `srun` job with `Ctrl-c`.) - -```bash -`r config$host_prompt_work` srun `r config$sched$submit$options` --time=00:01:00 hostname -``` - -```output -nid001976 -``` - -`srun` accepts all of the same options as `sbatch`. However, instead of specifying these in a -script, these options are specified on the command-line when starting a job. - -Typically, the resulting shell environment will be the same as that for -`sbatch`. - -### Interactive jobs - -Sometimes, you will need a lot of resource for interactive use. Perhaps it's our first time running -an analysis or we are attempting to debug something that went wrong with a previous job. -Fortunately, SLURM makes it easy to start an interactive job with `srun`: - -```bash -`r config$host_prompt_work` srun `r config$sched$submit$options` --pty /bin/bash -``` - -You should be presented with a bash prompt. Note that the prompt may change -to reflect your new location, in this case the compute node we are logged on. -You can also verify this with `hostname`. - -When you are done with the interactive job, type `exit` to quit your session. diff --git a/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml b/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml index 54b76da6..a6d78a72 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml +++ b/episodes/files/snippets/Ghastly_Mistakes/_config_options.yml @@ -62,4 +62,5 @@ episode_order: - 17-parallel - 18-resources - 19-responsibility - \ No newline at end of file + +... diff --git a/episodes/files/snippets/Ghastly_Mistakes/cluster/queue-info.Rmd b/episodes/files/snippets/Ghastly_Mistakes/cluster/queue-info.Rmd index 29f69030..2487fc02 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/cluster/queue-info.Rmd +++ b/episodes/files/snippets/Ghastly_Mistakes/cluster/queue-info.Rmd @@ -1,5 +1,6 @@ ```output PARTITION AVAIL TIMELIMIT NODES STATE NODELIST -doom up infinite 2 idle doo[1-2] -gloom up infinite 2 idle glo[1-2] +cpubase_bycore_b1* up infinite 4 idle node[1-2],smnode[1-2] +node up infinite 2 idle node[1-2] +smnode up infinite 2 idle smnode[1-2] ``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/cluster/specific-node-info.Rmd b/episodes/files/snippets/Ghastly_Mistakes/cluster/specific-node-info.Rmd index c38b377b..b9f6c8dc 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/cluster/specific-node-info.Rmd +++ b/episodes/files/snippets/Ghastly_Mistakes/cluster/specific-node-info.Rmd @@ -7,6 +7,6 @@ where your jobs will actually run. Try running this command to see the name, CPUs and memory available on one of the worker nodes: ```bash -`r config$remote$prompt` `r config$sched$info` -o "%n %c %m" | column -t +`r config$remote$prompt` sinfo -o "%n %c %m" | column -t ``` ::: diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.Rmd b/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.Rmd index e7fc4eb2..8402eaa0 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.Rmd +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/available-modules.Rmd @@ -1,11 +1,22 @@ +```bash +`r config$remote$prompt` module avail | less +``` + ```output -~~~ /usr/local/modules ~~~ - amdahl/0.0.1 (D) python/2.7.1 (O) +~~~ /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/modules/all ~~~ + Bazel/3.6.0-GCCcore-x.y.z NSS/3.51-GCCcore-x.y.z + Bison/3.5.3-GCCcore-x.y.z Ninja/1.10.0-GCCcore-x.y.z + Boost/1.72.0-gompi-2020a OSU-Micro-Benchmarks/5.6.3-gompi-2020a + CGAL/4.14.3-gompi-2020a-Python-3.x.y OpenBLAS/0.3.9-GCC-x.y.z + CMake/3.16.4-GCCcore-x.y.z OpenFOAM/v2006-foss-2020a + +[removed most of the output here for clarity] Where: L: Module is loaded D: Default Module - O: Module is obsolete + Aliases exist: foo/1.2.3 (1.2) means that + "module load foo/1.2" will load foo/1.2.3 Use "module spider" to find all possible modules and extensions. Use "module keyword key1 key2 ..." to search for all possible modules matching diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/default-modules.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/default-modules.snip deleted file mode 100644 index a448dd96..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/default-modules.snip +++ /dev/null @@ -1,4 +0,0 @@ -``` -No Modulefiles Currently Loaded. -``` -{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/missing-python.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/missing-python.Rmd similarity index 82% rename from episodes/files/snippets/Ghastly_Mistakes/modules/missing-python.snip rename to episodes/files/snippets/Ghastly_Mistakes/modules/missing-python.Rmd index 89039d32..f06ed919 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/missing-python.snip +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/missing-python.Rmd @@ -1,15 +1,14 @@ If the `python3` command was unavailable, we would see output like +```output +/usr/bin/which: no python3 in (/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin:/opt/software/slurm/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/home/`r config$remote$user`/.local/bin:/home/`r config$remote$user`/bin) ``` -/usr/bin/which: no python3 in (/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin:/opt/software/slurm/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/home/{{site.remote.user}}/.local/bin:/home/{{site.remote.user}}/bin) -``` -{: .output} Note that this wall of text is really a list, with values separated by the `:` character. The output is telling us that the `which` command searched the following directories for `python3`, without success: -``` +```output /cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin /opt/software/slurm/bin /usr/local/bin @@ -17,17 +16,15 @@ searched the following directories for `python3`, without success: /usr/local/sbin /usr/sbin /opt/puppetlabs/bin -/home/{{site.remote.user}}/.local/bin -/home/{{site.remote.user}}/bin +/home/`r config$remote$user`/.local/bin +/home/`r config$remote$user`/bin ``` -{: .output} However, in our case we do have an existing `python3` available so we see -``` +```output /cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin/python3 ``` -{: .output} We need a different Python than the system provided one though, so let us load a module to access it. diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.Rmd b/episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.Rmd new file mode 100644 index 00000000..81d3be34 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.Rmd @@ -0,0 +1,8 @@ +```bash +`r config$remote$prompt` module load `r config$remote$modules$python` +`r config$remote$prompt` which python3 +``` + +```output +/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin/python3 +``` \ No newline at end of file diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.snip deleted file mode 100644 index d9bab7b4..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/module-load-python.snip +++ /dev/null @@ -1,5 +0,0 @@ -``` -{{ site.remote.prompt }} module load {{ site.remote.module_python3 }} -{{ site.remote.prompt }} which python3 -``` -{: .language-bash} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-output.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.Rmd similarity index 85% rename from episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-output.snip rename to episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.Rmd index 01d010ba..13031bf9 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-output.snip +++ b/episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.Rmd @@ -1,4 +1,8 @@ +```bash +`r config$remote$prompt` ls /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin ``` + +```output 2to3 nosetests-3.8 python rst2s5.py 2to3-3.8 pasteurize python3 rst2xetex.py chardetect pbr python3.8 rst2xml.py @@ -13,4 +17,3 @@ idle3.8 pygmentize rst2odt_prepstyles.py virtualenv netaddr pytest rst2odt.py wheel nosetests py.test rst2pseudoxml.py ``` -{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.snip deleted file mode 100644 index 46dec092..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/python-executable-dir.snip +++ /dev/null @@ -1,4 +0,0 @@ -``` -/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin/python3 -``` -{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-command.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-command.snip deleted file mode 100644 index 80319d0a..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/python-ls-dir-command.snip +++ /dev/null @@ -1,4 +0,0 @@ -``` -{{ site.remote.prompt }} ls /cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin -``` -{: .language-bash} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/python-module-path.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/python-module-path.snip deleted file mode 100644 index 68e97df1..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/python-module-path.snip +++ /dev/null @@ -1,4 +0,0 @@ -``` -/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/SQLite/3.31.1-GCCcore-x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Tcl/8.6.10-GCCcore-x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/GCCcore/x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin:/opt/software/slurm/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/home/user01/.local/bin:/home/user01/bin -``` -{: .output} diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/software-dependencies.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/software-dependencies.Rmd similarity index 100% rename from episodes/files/snippets/HPCC_MagicCastle_slurm/modules/software-dependencies.snip rename to episodes/files/snippets/Ghastly_Mistakes/modules/software-dependencies.Rmd diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/wrong-gcc-version.snip b/episodes/files/snippets/Ghastly_Mistakes/modules/wrong-gcc-version.snip deleted file mode 100644 index 8fbd2825..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/wrong-gcc-version.snip +++ /dev/null @@ -1,5 +0,0 @@ - diff --git a/episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks-jobscript.snip b/episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks-jobscript.snip deleted file mode 100644 index 2f643071..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks-jobscript.snip +++ /dev/null @@ -1,16 +0,0 @@ -``` -{{ site.remote.bash_shebang }} -{{ site.sched.comment }} {{ site.sched.flag.name }} parallel-job -{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} -{{ site.sched.comment }} -N 1 -{{ site.sched.comment }} -n 8 - -# Load the computing environment we need -# (mpi4py and numpy are in SciPy-bundle) -module load {{ site.remote.module_python3 }} -module load SciPy-bundle - -# Execute the task -mpiexec amdahl -``` -{: .language-bash} diff --git a/episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks.Rmd b/episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks.Rmd new file mode 100644 index 00000000..dcbe379a --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/parallel/eight-tasks.Rmd @@ -0,0 +1,58 @@ +```bash +`r config$remote$prompt` nano parallel-job.sh +`r config$remote$prompt` cat parallel-job.sh +``` + +```bash +`r config$remote$shebang` +`r config$sched$comment` `r config$sched$flag$name` parallel-job +`r config$sched$comment` `r config$sched$flag$queue` `r config$sched$queue$testing` +`r config$sched$comment` -N 1 +`r config$sched$comment` -n 8 + +# Load the computing environment we need +# (mpi4py and numpy are in SciPy-bundle) +module load `r config$remote$modules$python` +module load SciPy-bundle + +# Execute the task +mpiexec amdahl +``` + +Then submit your job. Note that the submission command has not really changed +from how we submitted the serial job: all the parallel settings are in the +batch file rather than the command line. + +```bash +`r config$remote$prompt` `r config$sched$submit$name` parallel-job.sh +``` + +As before, use the status commands to check when your job runs. + +```bash +`r config$remote$prompt` ls -t +``` + +```output +slurm-347271.out parallel-job.sh slurm-347178.out slurm-347087.out serial-job.sh amdahl README.md LICENSE.txt +``` + +```bash +`r config$remote$prompt` cat slurm-347178.out +``` + +```output +which should take 7.688 seconds with 0.850 parallel proportion of the workload. + + Hello, World! I am process 4 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 0 of 8 on `r config$remote$node`. I will do all the serial 'work' for 4.500 seconds. + Hello, World! I am process 2 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 1 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 3 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 5 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 6 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 7 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + Hello, World! I am process 0 of 8 on `r config$remote$node`. I will do parallel 'work' for 3.188 seconds. + +Total execution time (according to rank 0): 7.697 seconds +``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks-jobscript.snip b/episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks-jobscript.snip deleted file mode 100644 index 19804d74..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks-jobscript.snip +++ /dev/null @@ -1,16 +0,0 @@ -``` -{{ site.remote.bash_shebang }} -{{ site.sched.comment }} {{ site.sched.flag.name }} parallel-job -{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} -{{ site.sched.comment }} -N 1 -{{ site.sched.comment }} -n 4 - -# Load the computing environment we need -# (mpi4py and numpy are in SciPy-bundle) -module load {{ site.remote.module_python3 }} -module load SciPy-bundle - -# Execute the task -mpiexec amdahl -``` -{: .language-bash} diff --git a/episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks.Rmd b/episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks.Rmd new file mode 100644 index 00000000..2e885505 --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/parallel/four-tasks.Rmd @@ -0,0 +1,56 @@ +```bash +`r config$remote$prompt` cp serial-job.sh parallel-job.sh +`r config$remote$prompt` nano parallel-job.sh +`r config$remote$prompt` cat parallel-job.sh +``` + +```bash +`r config$remote$shebang` +`r config$sched$comment` `r config$sched$flag$name` parallel-job +`r config$sched$comment` `r config$sched$flag$queue` `r config$sched$queue$testing` +`r config$sched$comment` -N 1 +`r config$sched$comment` -n 4 + +# Load the computing environment we need +# (mpi4py and numpy are in SciPy-bundle) +module load `r config$remote$modules$python` +module load SciPy-bundle + +# Execute the task +mpiexec amdahl +``` + +Then submit your job. Note that the submission command has not really changed +from how we submitted the serial job: all the parallel settings are in the +batch file rather than the command line. + +```bash +`r config$remote$prompt` `r config$sched$submit$name` parallel-job.sh +``` + +As before, use the status commands to check when your job runs. + +```bash +`r config$remote$prompt` ls -t +``` + +```output +slurm-347178.out parallel-job.sh slurm-347087.out serial-job.sh amdahl README.md LICENSE.txt +``` + +```bash +`r config$remote$prompt` cat slurm-347178.out +``` + +```output +Doing 30.000 seconds of 'work' on 4 processors, +which should take 10.875 seconds with 0.850 parallel proportion of the workload. + + Hello, World! I am process 0 of 4 on `r config$remote$node`. I will do all the serial 'work' for 4.500 seconds. + Hello, World! I am process 2 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. + Hello, World! I am process 1 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. + Hello, World! I am process 3 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. + Hello, World! I am process 0 of 4 on `r config$remote$node`. I will do parallel 'work' for 6.375 seconds. + +Total execution time (according to rank 0): 10.888 seconds +``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/parallel/one-task-jobscript.snip b/episodes/files/snippets/Ghastly_Mistakes/parallel/one-task-jobscript.snip deleted file mode 100644 index 1941ef04..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/parallel/one-task-jobscript.snip +++ /dev/null @@ -1,14 +0,0 @@ -``` -{{ site.remote.bash_shebang }} -{{ site.sched.comment }} {{ site.sched.flag.name }} solo-job -{{ site.sched.comment }} {{ site.sched.flag.queue }} {{ site.sched.queue.testing }} -{{ site.sched.comment }} -N 1 -{{ site.sched.comment }} -n 1 - -# Load the computing environment we need -module load {{ site.remote.module_python3 }} - -# Execute the task -amdahl -``` -{: .language-bash} diff --git a/episodes/files/snippets/Ghastly_Mistakes/parallel/one-task.Rmd b/episodes/files/snippets/Ghastly_Mistakes/parallel/one-task.Rmd new file mode 100644 index 00000000..ceefdfdb --- /dev/null +++ b/episodes/files/snippets/Ghastly_Mistakes/parallel/one-task.Rmd @@ -0,0 +1,22 @@ +```bash +`r config$remote$prompt` nano serial-job.sh +`r config$remote$prompt` cat serial-job.sh +``` + +```bash +`r config$remote$shebang` +`r config$sched$comment` `r config$sched$flag$name` solo-job +`r config$sched$comment` `r config$sched$flag$queue` `r config$sched$queue$testing` +`r config$sched$comment` -N 1 +`r config$sched$comment` -n 1 + +# Load the computing environment we need +module load `r config$remote$modules$python` + +# Execute the task +amdahl +``` + +```bash +`r config$remote$prompt` `r config$sched$submit$name` serial-job.sh +``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/resources/account-history.snip b/episodes/files/snippets/Ghastly_Mistakes/resources/account-history.Rmd similarity index 97% rename from episodes/files/snippets/Ghastly_Mistakes/resources/account-history.snip rename to episodes/files/snippets/Ghastly_Mistakes/resources/account-history.Rmd index d5a87620..5e8a53f2 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/resources/account-history.snip +++ b/episodes/files/snippets/Ghastly_Mistakes/resources/account-history.Rmd @@ -1,4 +1,4 @@ -``` +```output JobID JobName Partition Account AllocCPUS State ExitCode ------------ ---------- ---------- ---------- ---------- ---------- -------- 7 file.sh cpubase_b+ def-spons+ 1 COMPLETED 0:0 @@ -10,5 +10,4 @@ 9 example-j+ cpubase_b+ def-spons+ 1 COMPLETED 0:0 9.batch batch def-spons+ 1 COMPLETED 0:0 9.extern extern def-spons+ 1 COMPLETED 0:0 -``` -{: .output} +``` \ No newline at end of file diff --git a/episodes/files/snippets/Ghastly_Mistakes/resources/monitor-processes-top.snip b/episodes/files/snippets/Ghastly_Mistakes/resources/monitor-processes-top.snip deleted file mode 100644 index 12685735..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/resources/monitor-processes-top.snip +++ /dev/null @@ -1,19 +0,0 @@ -``` -top - 21:00:19 up 3:07, 1 user, load average: 1.06, 1.05, 0.96 -Tasks: 311 total, 1 running, 222 sleeping, 0 stopped, 0 zombie -%Cpu(s): 7.2 us, 3.2 sy, 0.0 ni, 89.0 id, 0.0 wa, 0.2 hi, 0.2 si, 0.0 st -KiB Mem : 16303428 total, 8454704 free, 3194668 used, 4654056 buff/cache -KiB Swap: 8220668 total, 8220668 free, 0 used. 11628168 avail Mem - - PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND - 1693 jeff 20 0 4270580 346944 171372 S 29.8 2.1 9:31.89 gnome-shell - 3140 jeff 20 0 3142044 928972 389716 S 27.5 5.7 13:30.29 Web Content - 3057 jeff 20 0 3115900 521368 231288 S 18.9 3.2 10:27.71 firefox - 6007 jeff 20 0 813992 112336 75592 S 4.3 0.7 0:28.25 tilix - 1742 jeff 20 0 975080 164508 130624 S 2.0 1.0 3:29.83 Xwayland - 1 root 20 0 230484 11924 7544 S 0.3 0.1 0:06.08 systemd - 68 root 20 0 0 0 0 I 0.3 0.0 0:01.25 kworker/4:1 - 2913 jeff 20 0 965620 47892 37432 S 0.3 0.3 0:11.76 code - 2 root 20 0 0 0 0 S 0.0 0.0 0:00.02 kthreadd -``` -{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/resources/system-memory-free.snip b/episodes/files/snippets/Ghastly_Mistakes/resources/system-memory-free.snip deleted file mode 100644 index ec4c0d3f..00000000 --- a/episodes/files/snippets/Ghastly_Mistakes/resources/system-memory-free.snip +++ /dev/null @@ -1,6 +0,0 @@ -``` -total used free shared buff/cache available -Mem: 3.8G 1.5G 678M 327M 1.6G 1.6G -Swap: 3.9G 170M 3.7G -``` -{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-status.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-status.Rmd index 8873c7c1..9011f5b4 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-status.Rmd +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/basic-job-status.Rmd @@ -1,6 +1,6 @@ ```output -JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 9 doom example-job luigi R 0:05 1 doo1 +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 9 cpubase_b example- user01 R 0:05 1 node1 ``` We can see all the details of our job, most importantly that it is in the `R` diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/job-with-name-status.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/job-with-name-status.Rmd index aa8eab85..815f3b9d 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/scheduler/job-with-name-status.Rmd +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/job-with-name-status.Rmd @@ -1,4 +1,4 @@ ```output -JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 10 doom hello-world luigi R 0:02 1 doo1 +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 10 cpubase_b hello-wo user01 R 0:02 1 node1 ``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/option-flags-list.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/option-flags-list.Rmd index 9bc993ff..5e80b164 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/scheduler/option-flags-list.Rmd +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/option-flags-list.Rmd @@ -1,5 +1,3 @@ -> option-flag-list.Rmd is constant for all snippet libraries - * `--ntasks=` or `-n `: How many CPU cores does your job need, in total? diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/print-sched-variables.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/print-sched-variables.Rmd index 659cefae..21fec883 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/scheduler/print-sched-variables.Rmd +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/print-sched-variables.Rmd @@ -2,7 +2,7 @@ ## Job environment variables -When ``r config$sched$name`` runs a job, it sets a number of environment +When {{ site.sched.name }} runs a job, it sets a number of environment variables for the job. One of these will let us check what directory our job script was submitted from. The `SLURM_SUBMIT_DIR` variable is set to the directory from which our job was submitted. Using the `SLURM_SUBMIT_DIR` @@ -14,13 +14,13 @@ job was submitted. ## Solution ```bash -`r config$remote$prompt` nano example-job.sh -`r config$remote$prompt` cat example-job.sh +{{ site.remote.prompt }} nano example-job.sh +{{ site.remote.prompt }} cat example-job.sh ``` ```bash -`r config$remote$bash_shebang` -`r config$sched$comment` -t 00:00:30 +`r site$remote$bash_shebang` +#SBATCH -t 00:00:30 echo -n "This script is running on " hostname diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-output.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-output.Rmd index fa1b19ea..106d9ecc 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-output.Rmd +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/runtime-exceeded-output.Rmd @@ -1,5 +1,5 @@ ```output This script is running on ... -slurmstepd: error: *** JOB 12 ON doo1 CANCELLED AT 2021-02-19T13:55:57 +slurmstepd: error: *** JOB 12 ON node1 CANCELLED AT 2021-02-19T13:55:57 DUE TO TIME LIMIT *** ``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-begin.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-begin.Rmd index d5a03bfb..3cfb8c95 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-begin.Rmd +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/terminate-job-begin.Rmd @@ -1,6 +1,6 @@ ```output Submitted batch job 13 -JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 13 doom long_job luigi R 0:02 1 doo1 +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 13 cpubase_b long_job user01 R 0:02 1 node1 ``` diff --git a/episodes/files/snippets/Ghastly_Mistakes/scheduler/using-nodes-interactively.Rmd b/episodes/files/snippets/Ghastly_Mistakes/scheduler/using-nodes-interactively.Rmd index 80834a8f..0a305412 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/scheduler/using-nodes-interactively.Rmd +++ b/episodes/files/snippets/Ghastly_Mistakes/scheduler/using-nodes-interactively.Rmd @@ -1,5 +1,3 @@ -> using-nodes-interactively.Rmd is constant across snippets - The `r config$sched$interactive` command runs a single command on the cluster and then exits. Let's demonstrate this by running the `hostname` command with `r config$sched$interactive`. (We can cancel an `r config$sched$interactive` @@ -10,7 +8,7 @@ job with `Ctrl-c`.) ``` ```output -`r config$remote$node` +{{ config$remote.node` ``` `r config$sched$interactive` accepts all of the same options as @@ -28,7 +26,7 @@ This job will use 2 CPUs. ``` Typically, the resulting shell environment will be the same as that for -`r config$sched$submit$name`. +`r config$sched$submit.name`. ### Interactive jobs @@ -38,7 +36,7 @@ went wrong with a previous job. Fortunately, {{ config$sched$name` makes it easy to start an interactive job with `r config$sched$interactive`: ```bash -`r config$remote$prompt` `r config$sched$interactive` --pty bash +`r config$remote.prompt` `r config$sched$interactive` --pty bash ``` You should be presented with a bash prompt. Note that the prompt will likely diff --git a/episodes/files/snippets/Ghastly_Mistakes/transferring-files/filezilla-ssh-tunnel-instructions.snip b/episodes/files/snippets/Ghastly_Mistakes/transferring-files/filezilla-ssh-tunnel-instructions.snip deleted file mode 100644 index e69de29b..00000000 diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.Rmd b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.Rmd index 2c48235a..8402eaa0 100644 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.Rmd +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/available-modules.Rmd @@ -1,5 +1,5 @@ ```bash -`r config$remote$prompt` module avail +`r config$remote$prompt` module avail | less ``` ```output diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-module-path.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-module-path.snip deleted file mode 100644 index 68e97df1..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/python-module-path.snip +++ /dev/null @@ -1,4 +0,0 @@ -``` -/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Python/3.x.y-GCCcore-x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/SQLite/3.31.1-GCCcore-x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/Tcl/8.6.10-GCCcore-x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/software/x86_64/amd/zen2/software/GCCcore/x.y.z/bin:/cvmfs/pilot.eessi-hpc.org/2020.12/compat/linux/x86_64/usr/bin:/opt/software/slurm/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/home/user01/.local/bin:/home/user01/bin -``` -{: .output} diff --git a/episodes/files/snippets/Ghastly_Mistakes/modules/software-dependencies.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/software-dependencies.Rmd similarity index 80% rename from episodes/files/snippets/Ghastly_Mistakes/modules/software-dependencies.snip rename to episodes/files/snippets/HPCC_MagicCastle_slurm/modules/software-dependencies.Rmd index fe107f2e..01234436 100644 --- a/episodes/files/snippets/Ghastly_Mistakes/modules/software-dependencies.snip +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/software-dependencies.Rmd @@ -1,26 +1,23 @@ To demonstrate, let's use `module list`. `module list` shows all loaded software modules. +```bash +`r config$remote$prompt` module list ``` -{{ site.remote.prompt }} module list -``` -{: .language-bash} -``` +```output Currently Loaded Modules: 1) GCCcore/x.y.z 4) GMP/6.2.0-GCCcore-x.y.z 2) Tcl/8.6.10-GCCcore-x.y.z 5) libffi/3.3-GCCcore-x.y.z 3) SQLite/3.31.1-GCCcore-x.y.z 6) Python/3.x.y-GCCcore-x.y.z ``` -{: .output} +```bash +`r config$remote$prompt` module load GROMACS +`r config$remote$prompt` module list ``` -{{ site.remote.prompt }} module load GROMACS -{{ site.remote.prompt }} module list -``` -{: .language-bash} -``` +```output Currently Loaded Modules: 1) GCCcore/x.y.z 14) libfabric/1.11.0-GCCcore-x.y.z 2) Tcl/8.6.10-GCCcore-x.y.z 15) PMIx/3.1.5-GCCcore-x.y.z @@ -36,20 +33,18 @@ Currently Loaded Modules: 12) libevent/2.1.11-GCCcore-x.y.z 25) GROMACS/2020.1-foss-2020a-Python-3... 13) UCX/1.8.0-GCCcore-x.y.z ``` -{: .output} So in this case, loading the `GROMACS` module (a bioinformatics software package), also loaded `GMP/6.2.0-GCCcore-x.y.z` and `SciPy-bundle/2020.03-foss-2020a-Python-3.x.y` as well. Let's try unloading the `GROMACS` package. +```bash +`r config$remote$prompt` module unload GROMACS +`r config$remote$prompt` module list ``` -{{ site.remote.prompt }} module unload GROMACS -{{ site.remote.prompt }} module list -``` -{: .language-bash} -``` +```output Currently Loaded Modules: 1) GCCcore/x.y.z 13) UCX/1.8.0-GCCcore-x.y.z 2) Tcl/8.6.10-GCCcore-x.y.z 14) libfabric/1.11.0-GCCcore-x.y.z @@ -64,23 +59,20 @@ Currently Loaded Modules: 11) hwloc/2.2.0-GCCcore-x.y.z 23) SciPy-bundle/2020.03-foss-2020a-Py... 12) libevent/2.1.11-GCCcore-x.y.z 24) networkx/2.4-foss-2020a-Python-3.x.y ``` -{: .output} So using `module unload` "un-loads" a module, and depending on how a site is - configured it may also unload all of the dependencies (in our case it does - not). If we wanted to unload everything at once, we could run `module purge` - (unloads everything). +configured it may also unload all of the dependencies (in our case it does +not). If we wanted to unload everything at once, we could run `module purge` +(unloads everything). +```bash +`r config$remote$prompt` module purge +`r config$remote$prompt` module list ``` -{{ site.remote.prompt }} module purge -{{ site.remote.prompt }} module list -``` -{: .language-bash} -``` +```output No modules loaded ``` -{: .output} Note that `module purge` is informative. It will also let us know if a default set of "sticky" packages cannot be unloaded (and how to actually unload these diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/wrong-gcc-version.snip b/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/wrong-gcc-version.snip deleted file mode 100644 index 8fbd2825..00000000 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/modules/wrong-gcc-version.snip +++ /dev/null @@ -1,5 +0,0 @@ - diff --git a/episodes/files/snippets/rename-snippets.sh b/episodes/files/snippets/rename-snippets.sh deleted file mode 100755 index 85dd5711..00000000 --- a/episodes/files/snippets/rename-snippets.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/bash - -# This script renames snippets from the argument directory in an effort -# to make the names more meaningful, thereby simplifying the process of -# porting to a new site. - -if [[ $# != 1 ]] || [[ $1 == "-h" ]] || [[ $1 == "--help" ]]; then - echo "Please supply a folder name containing snippets for your site. This" - echo "script will rename those snippets based on the script activity. E.g.," - echo " $0 ComputeCanada_Graham_slurm" - exit 1 -fi - -if [[ $(which git) == "" ]]; then - echo "Error: This script requires git. Please install it and try again." - exit 1 -fi - -PREFIX=$1 - -function rename_snip { - mv $1 $2 -} - -## Episode 12: Working on a remote HPC system - -#$ {{ site.sched.info }} -rename_snip ${PREFIX}/12/info.snip ${PREFIX}/12/queue-info.snip -#$ sinfo -n {{ site.remote.node }} -o "%n %c %m" -rename_snip ${PREFIX}/12/explore.snip ${PREFIX}/12/specific-node-info.snip - -## Episode 13: Scheduling jobs - -# {{ site.sched.submit.name }} {{ site.sched.submit.options }} example-job.sh -rename_snip ${PREFIX}/13/submit_output.snip ${PREFIX}/13/basic-job-script.snip -#$ {{ site.sched.status }} {{ site.sched.flag.user }} -rename_snip ${PREFIX}/13/statu_output.snip ${PREFIX}/13/basic-job-status.snip -#$ {{ site.sched.status }} {{ site.sched.flag.user }} -rename_snip ${PREFIX}/13/statu_name_output.snip ${PREFIX}/13/job-with-name-status.snip -## The following are several key resource requests: -rename_snip ${PREFIX}/13/stat_options.snip ${PREFIX}/13/option-flags-list.snip -## Print SLURM_CPUS_PER_TASK, PBS_O_WORKDIR, or similar -rename_snip ${PREFIX}/13/env_challenge.snip ${PREFIX}/13/print-sched-variables.snip -#$ {{ site.sched.submit.name }} {{ site.sched.submit.options }} example-job.sh -rename_snip ${PREFIX}/13/long_job_cat.snip ${PREFIX}/13/runtime-exceeded-job.snip -#$ {{ site.sched.status }} {{ site.sched.flag.user }} -rename_snip ${PREFIX}/13/long_job_err.snip ${PREFIX}/13/runtime-exceeded-output.snip -#$ {{ site.sched.submit.name }} {{ site.sched.submit.options }} example-job.sh -#$ {{ site.sched.status }} {{ site.sched.flag.user }} -rename_snip ${PREFIX}/13/del_job_output1.snip ${PREFIX}/13/terminate-job-begin.snip -#$ {{site.sched.del }} 38759 -rename_snip ${PREFIX}/13/del_job_output2.snip ${PREFIX}/13/terminate-job-cancel.snip -#$ {{site.sched.del }} {{ site.sched.flag.user }} -rename_snip ${PREFIX}/13/del_multiple_challenge.snip ${PREFIX}/13/terminate-multiple-jobs.snip -## use the compute node resources interactively -rename_snip ${PREFIX}/13/interactive_example.snip ${PREFIX}/13/using-nodes-interactively.snip - -## Episode 14: Accessing software - -#$ module avail -rename_snip ${PREFIX}/14/module_avail.snip ${PREFIX}/14/available-modules.snip -#$ which python -rename_snip ${PREFIX}/14/which_missing.snip ${PREFIX}/14/missing-python.snip -#$ module load python[3] -rename_snip ${PREFIX}/14/load_python.snip ${PREFIX}/14/module-load-python.snip -#$ which python -rename_snip ${PREFIX}/14/which_python.snip ${PREFIX}/14/python-executable-dir.snip -#$ echo $PATH -rename_snip ${PREFIX}/14/path.snip ${PREFIX}/14/python-module-path.snip -#$ ls $(dirname $(which python)) -rename_snip ${PREFIX}/14/ls_dir.snip ${PREFIX}/14/python-ls-dir-command.snip -rename_snip ${PREFIX}/14/ls_dir_output.snip ${PREFIX}/14/python-ls-dir-output.snip -## Loading & unloading software and dependencies -rename_snip ${PREFIX}/14/depend_demo.snip ${PREFIX}/14/software-dependencies.snip -## gcc example -rename_snip ${PREFIX}/14/gcc_example.snip ${PREFIX}/14/wrong-gcc-version.snip - -## Episode 15: Transferring files - -## Episode 16: Using resources effectively - -#$ {{ site.sched.hist }} -rename_snip ${PREFIX}/16/stat_output.snip ${PREFIX}/16/account-history.snip -#$ top -rename_snip ${PREFIX}/16/top_output.snip ${PREFIX}/16/monitor-processes-top.snip -#$ free -h -rename_snip ${PREFIX}/16/free_output.snip ${PREFIX}/16/system-memory-free.snip - -## Episode 17: Using shared resources responsibly diff --git a/episodes/lesson_config.yaml b/episodes/lesson_config.yaml index 39ae49a6..da2ae88d 100644 --- a/episodes/lesson_config.yaml +++ b/episodes/lesson_config.yaml @@ -1,3 +1,4 @@ --- main_config: files/snippets/Ghastly_Mistakes/_config_options.yml -snippets: Ghastly_Mistakes \ No newline at end of file +snippets: Ghastly_Mistakes +... diff --git a/episodes/load_config.R b/episodes/load_config.R index dce81aa6..f6506897 100644 --- a/episodes/load_config.R +++ b/episodes/load_config.R @@ -7,7 +7,7 @@ library(yaml) config <- yaml.load_file("lesson_config.yaml") ## If "main_config" key exists, load the second configuration and merge -print(paste("Loading ", config$main_config)) +### print(paste("Loading ", config$main_config)) if (!is.null(config$main_config) && file.exists(config$main_config)) { override_config <- yaml.load_file(config$main_config) config <- modifyList(config, override_config) @@ -30,16 +30,14 @@ choose_doc <- function(child_file) { fallback = file.path(current_doc, fallback_snippets, child_file) ) print(doc_paths) - print(getwd()) + ### print(getwd()) # Return the valid path, or NULL if neither exists if (file.exists(doc_paths$main)) { - print("Returning") - print(doc_paths$main) + print(paste("Returning", doc_paths$main)) return(doc_paths$main) } else if (file.exists(doc_paths$fallback)) { - print("Returning") - print(doc_paths$fallback) + print(paste("Returning", doc_paths$fallback)) return(doc_paths$fallback) } else { print("Returning NULL") diff --git a/site/README.md b/site/README.md index 42997e3d..0a00291c 100644 --- a/site/README.md +++ b/site/README.md @@ -1,2 +1,2 @@ This directory contains rendered lesson materials. Please do not edit files -here. +here. From 2bfd247c66789db240570fb410ecf6abd30608b9 Mon Sep 17 00:00:00 2001 From: Trevor Keller Date: Wed, 5 Nov 2025 16:44:35 -0500 Subject: [PATCH 7/7] Working snippetized version. --- episodes/13-scheduler.Rmd | 12 ++-- episodes/14-environment-variables.Rmd | 2 +- episodes/15-modules.Rmd | 2 +- episodes/epcc_config.yaml | 50 ------------- .../_config_options.yml | 2 +- episodes/lesson_config.yaml | 4 +- episodes/lesson_config_hpcc.yaml | 2 - episodes/slurm_defaults.yaml | 71 ------------------- 8 files changed, 11 insertions(+), 134 deletions(-) delete mode 100644 episodes/epcc_config.yaml delete mode 100644 episodes/lesson_config_hpcc.yaml delete mode 100644 episodes/slurm_defaults.yaml diff --git a/episodes/13-scheduler.Rmd b/episodes/13-scheduler.Rmd index d74d1b83..1d8bc45d 100644 --- a/episodes/13-scheduler.Rmd +++ b/episodes/13-scheduler.Rmd @@ -57,7 +57,7 @@ In this case, the job we want to run is a shell script -- essentially a text file containing a list of UNIX commands to be executed in a sequential manner. Our shell script will have three parts: -- On the very first line, add ``r config$remote$bash_shebang``. The `#!` +- On the very first line, add ``r config$remote$shebang``. The `#!` (pronounced "hash-bang" or "shebang") tells the computer what program is meant to process the contents of this file. In this case, we are telling it that the commands that follow are written for the command-line shell (what @@ -75,7 +75,7 @@ manner. Our shell script will have three parts: ``` ```bash -`r config$remote$bash_shebang` +`r config$remote$shebang` echo -n "This script is running on " hostname @@ -163,7 +163,7 @@ resources we must customize our job script. Comments in UNIX shell scripts (denoted by `#`) are typically ignored, but there are exceptions. For instance the special `#!` comment at the beginning of scripts specifies what program should be used to run it (you'll typically see -``r config$local$bash_shebang``). Schedulers like `r config$sched$name` also +``r config$local$shebang``). Schedulers like `r config$sched$name` also have a special comment used to denote special scheduler-specific options. Though these comments differ from scheduler to scheduler, `r config$sched$name`'s special comment is ``r config$sched$comment``. Anything @@ -179,7 +179,7 @@ name of a job. Add an option to the script: ``` ```bash -`r config$remote$bash_shebang` +`r config$remote$shebang` `r config$sched$comment` `r config$sched$flag$name` hello-world echo -n "This script is running on " @@ -253,7 +253,7 @@ for it on the cluster. ``` ```bash -`r config$remote$bash_shebang` +`r config$remote$shebang` `r config$sched$comment` `r config$sched$flag$time` 00:01 # timeout in HH:MM echo -n "This script is running on " @@ -282,7 +282,7 @@ wall time, and attempt to run a job for two minutes. ``` ```bash -`r config$remote$bash_shebang` +`r config$remote$shebang` `r config$sched$comment` `r config$sched$flag$name` long_job `r config$sched$comment` `r config$sched$flag$time` 00:01 # timeout in HH:MM diff --git a/episodes/14-environment-variables.Rmd b/episodes/14-environment-variables.Rmd index 3e4c6977..39bc80d3 100644 --- a/episodes/14-environment-variables.Rmd +++ b/episodes/14-environment-variables.Rmd @@ -212,7 +212,7 @@ job was submitted. ``` ```output -`r config$remote$bash_shebang` +`r config$remote$shebang` `r config$sched$comment` `r config$sched$flag$time` 00:00:30 echo -n "This script is running on " diff --git a/episodes/15-modules.Rmd b/episodes/15-modules.Rmd index fc044cfa..f02b43a7 100644 --- a/episodes/15-modules.Rmd +++ b/episodes/15-modules.Rmd @@ -204,7 +204,7 @@ compute node). ``` ```output -`r config$remote$bash_shebang` +`r config$remote$shebang` `r config$sched$comment` `r config$sched$flag$partition` `r if (!is.null(config$sched$flag$qos)) {print(paste(config$sched$comment,config$sched$flag$qos))} `r config$sched$comment` `r config$sched$flag$time` 00:00:30 diff --git a/episodes/epcc_config.yaml b/episodes/epcc_config.yaml deleted file mode 100644 index 5425d7b1..00000000 --- a/episodes/epcc_config.yaml +++ /dev/null @@ -1,50 +0,0 @@ ---- -local: - prompt: "[user@laptop ~]$" - bash_shebang: "#!/bin/bash" - -snippets: EPCC_ARCHER2_slurm -baseurl: "https://epcced.github.io/2023-06-28-uoe-hpcintro" - -remote: - name: "ARCHER2" - host_id: "EPCC_ARCHER2" - login: "login.archer2.ac.uk" - host: "ln03" - node: "nid001053" - location: "EPCC, The University of Edinburgh" - homedir: "/home/ta114/ta114/" - user: "userid" - prompt: "userid@ln03:~>" - prompt_work: "userid@uan01:/work/ta114/ta114/userid>" - module_python3: "cray-python" - bash_shebang: "#!/bin/bash" - -sched: - name: "Slurm" - partition: "standard" - reservation: "shortqos" - qos: "short" - budget: "short" - submit: - name: "sbatch" - options: "--partition=standard --qos=short" - queue: - debug: "" - testing: "" - status: "squeue" - flag: - user: "-u userid" - interactive: "" - histdetail: "-l -j" - name: "--job-name" - time: "--time" - queue: "--partition" - nodes: "--nodes" - tasks: "" - del: "scancel" - interactive: "srun" - info: "sinfo" - comment: "#SBATCH" - hist: "sacct" - project: ta114 diff --git a/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml b/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml index 71c8b924..524b6807 100644 --- a/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml +++ b/episodes/files/snippets/HPCC_MagicCastle_slurm/_config_options.yml @@ -11,7 +11,7 @@ # Compute responsibly. --- -snippets: "/snippets_library/HPCC_MagicCastle_slurm" +snippets: "HPCC_MagicCastle_slurm" local: prompt: "[you@laptop:~]$" diff --git a/episodes/lesson_config.yaml b/episodes/lesson_config.yaml index da2ae88d..af0b8166 100644 --- a/episodes/lesson_config.yaml +++ b/episodes/lesson_config.yaml @@ -1,4 +1,4 @@ --- -main_config: files/snippets/Ghastly_Mistakes/_config_options.yml -snippets: Ghastly_Mistakes +main_config: files/snippets/HPCC_MagicCastle_slurm/_config_options.yml +snippets: HPCC_MagicCastle_slurm ... diff --git a/episodes/lesson_config_hpcc.yaml b/episodes/lesson_config_hpcc.yaml deleted file mode 100644 index f2044649..00000000 --- a/episodes/lesson_config_hpcc.yaml +++ /dev/null @@ -1,2 +0,0 @@ -main_snippets: "hpcc_test" -fallback_snippets: "hpcc" diff --git a/episodes/slurm_defaults.yaml b/episodes/slurm_defaults.yaml deleted file mode 100644 index 6372be9c..00000000 --- a/episodes/slurm_defaults.yaml +++ /dev/null @@ -1,71 +0,0 @@ -# Fail-safe defaults and implicit schema for lesson configuration files ---- -snippets: slurm -baseurl: "https://ocaisa.github.io/probable-pancake/" -# main_config: "lesson_config.yaml" - -# about the Learner's laptop -local: - prompt: "[you@laptop:~]$" # command-line prompt - shebang: "#!/bin/bash" # first line of every shell script - -# about the remote/cluster environment -remote: - name: "Example Cluster" # Name of the cluster (proper noun) - login: "cluster.example.com" # domain name of the login node - host: "head" # hostname of the login node - node: "node" # hostname of a compute node - location: "SchedMD" # institutional host of the cluster - homedir: "/home" # parent of home directories - user: "userid" # stand-in for the username - prompt: "[userid@head:~]" # command-line prompt - prompt_work: "[userid@head:/work/userid]" # prompt under /work - modules: - python: "Python" # name of the module providing Python v3 - shebang: "#!/bin/bash" # first line of every shell script - -# Commands & flags for the scheduler environment -sched: - name: "Slurm" # proper name of the scheduler - command: - batch: "sbatch" # run later - interactive: "srun" # run now - cancel: "scancel" # don't run - queue: - test: "debug" - prod: "batch" - status: "squeue" - flag: - user: "-u userid" - interactive: "--pty bash" - histdetail: "-l -j" - name: "-J" - time: "-t" - queue: "-p" - nodes: "-N" - tasks: "-n" - del: "scancel" - interactive: - command: "srun" - info: - command: "sinfo" - comment: "#SBATCH" - hist: "sacct -u userid" - hist_filter: "" - reservation: "" - qos: "" - budget: "" - project: "" - -# submit: -# salloc: obtain a job allocation -# sbatch: submit a batch script for later execution -# srun: obtain an allocation and execute an application -# account: -# sacct: display accounting data -# manage: -# sbcast: transfer a file to a job's compute nodes -# scancel: signal jobs/steps -# squeue: view information about jobs -# sinfo: view information about nodes & partitions -# scontrol: view & modify state