From 6caa409f1d6f553f1619e72fc47525cf6a9d28ea Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 27 Feb 2026 17:38:15 -0400 Subject: [PATCH 01/94] Fixed spelling and local rendering issue Fixed a couple of spelling mistakes and moved the location of base.yml, since the local rendering expected it under static/local --- agrf/sections/microbial.yml | 3 ++- agrf/{ => static/local}/base.yml | 0 2 files changed, 2 insertions(+), 1 deletion(-) rename agrf/{ => static/local}/base.yml (100%) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 6a9d7d0..4d69c10 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -34,7 +34,7 @@ tabs: - title_md: What files are included? description_md: | - | Fileanme | Description | + | Filename | Description | | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | **Raw Data** | | | Demultiplexed \*.FASTQ file (1 file per sample) | Contains sequencing reads for each sample | @@ -400,3 +400,4 @@ tabs: # description_md: Contact AGRF for more help with your data. # button_md: Contact AGRF # button_link: /request/support + diff --git a/agrf/base.yml b/agrf/static/local/base.yml similarity index 100% rename from agrf/base.yml rename to agrf/static/local/base.yml From 44dfaf5b2c592a858b6d44bf7709b59a732750f4 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 27 Feb 2026 17:38:48 -0400 Subject: [PATCH 02/94] Added a file to gitignore Added a file to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..62a5f74 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +python-3.14.3-amd64 (1).exe From b109fcb97eeb206c819767754df61c14fb869c34 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 6 Mar 2026 16:36:47 -0400 Subject: [PATCH 03/94] Add QIIME2 PCoA tool to beta diversity section Added QIIME2 Principal Coordinates Analysis (PCoA) tool --- agrf/sections/microbial.yml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 4d69c10..912ca99 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -303,7 +303,7 @@ tabs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha%2Fqiime2__diversity__alpha" @@ -335,8 +335,6 @@ tabs: datatypes: - qza - tsv - - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_correlation%2Fqiime2__diversity__alpha_correlation" @@ -354,7 +352,18 @@ tabs: - tsv button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_group_significance%2Fqiime2__diversity__alpha_group_significance" + + - title_md: qiime2 diversity pcoa - Principal coordinates analysis + description_md: | + Perform principal coordinates analysis (PCoA) on a beta diversity distance matrix to visualize the relationships between samples in a reduced dimensional space. + inputs: + - label: QIIME 2 Artifact file - Distance Matrix + datatypes: + - qza + + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__pcoa%2Fqiime2__diversity__pcoa" + - title_md: qiime2 diversity beta-rarefaction - Assess sequencing depth sufficiency description_md: | QIIME 2 repeatedly subsamples (rarefies) each sample’s sequence data at different depths (e.g., 1000, 2000, 3000 reads, etc.). From a725e8b247b173c5eeb58010dd070b7f32b56fb0 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 12 Mar 2026 20:47:12 -0400 Subject: [PATCH 04/94] Add QIIME2 feature-table summarize tool to microbial section Added tool entry for qiime2 feature-table summarize with inputs and Galaxy tool link. --- agrf/sections/microbial.yml | 10 ++++++++++ agrf/static/local/base.yml | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 912ca99..4fb1fa3 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -132,6 +132,16 @@ tabs: button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2_core__tools__export%2Fqiime2_core__tools__export" + + - title_md: qiime2 feature-table summarize - Summarize feature table + description_md: | + Generate a summary of a feature table, including counts per sample and feature frequency distribution. + inputs: + - label: QIIME 2 Artifact file - FeatureTable[Frequency] + datatypes: + - qza + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__feature_table__summarize%2Fqiime2__feature_table__summarize" + - title_md: QIIME vizualisation extractor - Visualise .qzv files in Galaxy. description_md: | Use this tool to visualisae .qzv files within Galaxy. diff --git a/agrf/static/local/base.yml b/agrf/static/local/base.yml index d0a068b..62c23d1 100644 --- a/agrf/static/local/base.yml +++ b/agrf/static/local/base.yml @@ -5,7 +5,7 @@ site_name: "Australia" lab_name: AGRF Lab #this will be in caps, at right of logo #or: use the word lab, in same font as agrf logo -galaxy_base_url: https://agrf.usegalaxy.org.au +galaxy_base_url: https://usegalaxy.org.au subdomain: agrf root_domain: usegalaxy.org.au From 114ec2c32979306442dd269140d1f6857dc960c2 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 13 Mar 2026 10:28:23 -0400 Subject: [PATCH 05/94] Add Krona visualization tool Added Krona tool entry under visualization tools for interactive taxonomic visualization in microbial profiling section. --- agrf/sections/microbial.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 4fb1fa3..4fd71eb 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -389,7 +389,15 @@ tabs: button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_rarefaction%2Fqiime2__diversity__alpha_rarefaction" - + - title_md: Krona - Interactive taxonomic visualisation + description_md: | + Krona generates interactive hierarchical visualisations of taxonomic abundance. + inputs: + - label: Taxonomy classification table + datatypes: + - tsv + - biom + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fsaskia-hiltemann%2Fkrona_text%2Fkrona-text" From 56dd0b25513c5bab23764d632a24c286c61dc41c Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 13 Mar 2026 12:16:24 -0400 Subject: [PATCH 06/94] Visualise microbial abundance using heatmaps and taxonomy bar plots Heatmaps show feature abundance across samples, while taxonomy bar plots show the relative abundance of taxa in each sample. --- agrf/sections/microbial.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 4fd71eb..bd78dad 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -399,6 +399,32 @@ tabs: - biom button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fsaskia-hiltemann%2Fkrona_text%2Fkrona-text" + - title_md: qiime2 feature-table heatmap - Visualise feature abundance as a heatmap + description_md: | + Generate a heatmap representation of a feature table to visualise abundance patterns across samples. + inputs: + - label: QIIME 2 Artifact file - FeatureTable[Frequency] + datatypes: + - qza + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__feature_table__heatmap%2Fqiime2__feature_table__heatmap" + + + + - title_md: qiime2 taxa barplot - Visualise taxonomic composition + description_md: | + Generate interactive stacked bar plots showing the relative abundance of taxa across samples. + inputs: + - label: Feature table with taxonomy + datatypes: + - qza + - label: Taxonomy assignments + datatypes: + - qza + - label: sample metadata + datatypes: + - tsv + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__taxa__barplot%2Fqiime2__taxa__barplot" + - id: tutorials From 5681040b1ad0cfa119b3c5485aa825b9cfcfe91a Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 13 Mar 2026 12:44:12 -0400 Subject: [PATCH 07/94] added picrust2(functional annotation Added PICRUSt2 tool for predicting microbial functions in the microbial tools section. --- agrf/sections/microbial.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index bd78dad..6361846 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -389,6 +389,15 @@ tabs: button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_rarefaction%2Fqiime2__diversity__alpha_rarefaction" + - title_md: PICRUSt2 metagenome prediction - Predict microbial functions + description_md: | + Predict microbial functional profiles such as gene families and metabolic pathways from microbial community data. + inputs: + - label: Sequence abundance table (OTUs or ASVs) + datatypes: + - biom + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fpicrust2_metagenome_pipeline%2Fpicrust2_metagenome_pipeline" + - title_md: Krona - Interactive taxonomic visualisation description_md: | Krona generates interactive hierarchical visualisations of taxonomic abundance. From 090d6f3949d4b2fcbbcd2334e91bdd64fed99a7b Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 13 Mar 2026 13:47:32 -0400 Subject: [PATCH 08/94] Add metadata filtering tool Added qiime2 feature-table filter-samples tool to enable metadata-based filtering (e.g., group exclusion and outlier removal) in the PacBio 16S workflow. --- agrf/sections/microbial.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 6361846..ae214be 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -164,6 +164,19 @@ tabs: view_tip: View in QIIME2 + - title_md: qiime2 feature-table filter-samples - Filter samples using metadata + description_md: | + Remove samples from a feature table based on sample metadata. + This can be used to exclude groups or remove outlier samples before downstream analysis. + inputs: + - label: QIIME 2 Artifact file - FeatureTable[Frequency] + datatypes: + - qza + - label: sample metadata + datatypes: + - tsv + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__feature_table__filter_samples%2Fqiime2__feature_table__filter_samples" + - id: alpha_diversity title: Alpha Diversity From c4a13b5d4d7ebe6b4adcce3cca20cfd1efed0256 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 13 Mar 2026 14:39:14 -0400 Subject: [PATCH 09/94] Add RNASeq section and update intro grid layout Added RNASeq workflow section to the AGRF lab page. Updated intro.md grid layout to include RNASeq navigation button. Updated base.yml to load rnaseq.yml section. --- agrf/static/local/base.yml | 1 + agrf/templates/intro.md | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/agrf/static/local/base.yml b/agrf/static/local/base.yml index 62c23d1..0ba1fbf 100644 --- a/agrf/static/local/base.yml +++ b/agrf/static/local/base.yml @@ -26,6 +26,7 @@ sections: - sections/data.yml - sections/qualitycontrol.yml - sections/microbial.yml + - sections/rnaseq.yml # - sections/moreanalysis.yml - sections/learn.yml # - sections/help.yml diff --git a/agrf/templates/intro.md b/agrf/templates/intro.md index 38dd504..85dcf19 100644 --- a/agrf/templates/intro.md +++ b/agrf/templates/intro.md @@ -68,14 +68,17 @@ then add Section, so data becomes dataSection - +
+ +
+ From e716ae921c7cc74effb1ba4aa29a999aaaba47c0 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 13 Mar 2026 15:21:40 -0400 Subject: [PATCH 10/94] Updated gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 62a5f74..921816b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ python-3.14.3-amd64 (1).exe +venv +venv311 +.DS_Stores +agrf/base.yml \ No newline at end of file From b6fd5d15ab8cec5f099aa180d2fa2dc833be2e01 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Wed, 18 Mar 2026 18:16:36 -0400 Subject: [PATCH 11/94] Add RNASeq section Added rnaseq.yml with initial structure including overview and tools sections. Includes alignment, differential expression, and visualisation tools. Further details on input/output files will be added. --- agrf/sections/rnaseq.yml | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 agrf/sections/rnaseq.yml diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml new file mode 100644 index 0000000..f47b335 --- /dev/null +++ b/agrf/sections/rnaseq.yml @@ -0,0 +1,52 @@ +id: rnaseq +title: RNASeq +tabs: + - id: overview + title: Overview + heading_md: + content: + - title_md: RNASeq workflow + description_md: | + RNA sequencing (RNASeq) measures gene expression levels across samples. + The workflow includes alignment, filtering, normalization, + differential expression analysis and visualization. + + - id: tools + title: Tools + content: + subsections: + + - id: alignment + title: Alignment + content: + - title_md: STAR - Align reads to reference + description_md: | + Align RNASeq reads to a reference genome using STAR. + button_md: Run STAR + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Frgrnastar%2Frna_star" + + - id: analysis + title: Differential expression + content: + - title_md: edgeR + description_md: | + Differential gene expression analysis. + + - title_md: DESeq2 + description_md: | + Identify differentially expressed genes between groups. + + - id: visualisation + title: Visualisation + content: + - title_md: PCA + description_md: | + Explore sample clustering. + + - title_md: Heatmap + description_md: | + Display gene expression patterns. + + - title_md: Volcano plot + description_md: | + Visualise differential expression results. \ No newline at end of file From f4f03ed56b98f5079ec3b7fc060ef480fbc05380 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 19 Mar 2026 11:47:12 -0400 Subject: [PATCH 12/94] Add RNASeq tools section with QC and alignment Added RNASeq tools including quality control (FastQC, MultiQC) and alignment (STAR). --- agrf/sections/rnaseq.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index f47b335..27c572d 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -15,7 +15,18 @@ tabs: title: Tools content: subsections: + + - id: qc + title: Quality control + content: + - title_md: FastQC + description_md: | + Assess raw RNASeq read quality including base quality scores, GC content and adapter contamination. + - title_md: MultiQC + description_md: | + Aggregate QC reports across multiple samples into a single summary report. + - id: alignment title: Alignment content: From 063ec75a727a5aa401d4f9081134ce5209046826 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 19 Mar 2026 12:03:19 -0400 Subject: [PATCH 13/94] Add RNASeq gene quantification step (featureCounts) Added gene quantification step using featureCounts to convert aligned reads into count matrices for downstream differential expression analysis. --- agrf/sections/rnaseq.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 27c572d..39ec107 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -35,6 +35,24 @@ tabs: Align RNASeq reads to a reference genome using STAR. button_md: Run STAR button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Frgrnastar%2Frna_star" + + - id: quantification + title: Gene quantification + content: + - title_md: featureCounts - Count reads per gene + description_md: | + Convert aligned reads (BAM files) into a gene count matrix required for downstream differential expression analysis. + + inputs: + - label: Aligned reads (BAM) + datatypes: + - bam + - label: Gene annotation file + datatypes: + - gtf + - gff + button_md: Run featureCounts + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffeaturecounts%2Ffeaturecounts" - id: analysis title: Differential expression From 9f3c9d84687e5820d0c65343a630de0d090331e3 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 19 Mar 2026 12:26:59 -0400 Subject: [PATCH 14/94] Add RNASeq filtering step Added filtering section including sample filtering and low count gene filtering to improve downstream differential expression analysis. --- agrf/sections/rnaseq.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 39ec107..34a53e0 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -54,6 +54,16 @@ tabs: button_md: Run featureCounts button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffeaturecounts%2Ffeaturecounts" + - id: filtering + title: Filtering + content: + - title_md: Sample filtering + description_md: | + Remove outlier samples or unwanted groups before analysis. + - title_md: Filter lowly expressed genes + description_md: | + Remove genes with low counts across samples to improve statistical power in differential expression analysis. + - id: analysis title: Differential expression content: From 0673b174133b2c8e9b50605f3cfc97a7f16dabe2 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 19 Mar 2026 12:42:39 -0400 Subject: [PATCH 15/94] Add normalization step to RNASeq workflow Added normalization section to explain adjustment of sequencing depth and library size. --- agrf/sections/rnaseq.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 34a53e0..a7b5ce4 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -63,6 +63,13 @@ tabs: - title_md: Filter lowly expressed genes description_md: | Remove genes with low counts across samples to improve statistical power in differential expression analysis. + + - id: normalization + title: Normalization + content: + - title_md: Normalization + description_md: | + Adjusts for differences in sequencing depth and library size between samples to allow accurate comparison of gene expression. - id: analysis title: Differential expression From 08e1600d9192c66e32cf69b2972138769bb5d2c8 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 19 Mar 2026 13:00:46 -0400 Subject: [PATCH 16/94] correction very few corrections --- agrf/sections/rnaseq.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index a7b5ce4..e154882 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -60,7 +60,7 @@ tabs: - title_md: Sample filtering description_md: | Remove outlier samples or unwanted groups before analysis. - - title_md: Filter lowly expressed genes + - title_md: Filter lowly expressed genes description_md: | Remove genes with low counts across samples to improve statistical power in differential expression analysis. @@ -91,8 +91,8 @@ tabs: - title_md: Heatmap description_md: | - Display gene expression patterns. + Display gene expression patterns across samples. - title_md: Volcano plot description_md: | - Visualise differential expression results. \ No newline at end of file + Visualise differential expression results (log fold change vs significance). \ No newline at end of file From ba2a861cdf430b61d829a0ff934caf6502ce177f Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 19 Mar 2026 14:17:01 -0400 Subject: [PATCH 17/94] Add input details for STAR alignment tool Added input file details (FASTQ, FASTA, GTF) for STAR alignment step in RNASeq workflow. --- agrf/sections/rnaseq.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index e154882..46c103a 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -33,6 +33,17 @@ tabs: - title_md: STAR - Align reads to reference description_md: | Align RNASeq reads to a reference genome using STAR. + + inputs: + - label: RNASeq reads (FASTQ) + datatypes: + - fastq + - label: Reference genome (fasta) + datatypes: + - fasta + - label: Gene annotation file (GTF) + datatypes: + - gtf button_md: Run STAR button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Frgrnastar%2Frna_star" From 7fdeaa652137714ae257cb422c0b60175781879a Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 19 Mar 2026 14:25:50 -0400 Subject: [PATCH 18/94] Add edgeR and DESeq2 inputs Added required inputs for differential expression analysis tools. --- agrf/sections/rnaseq.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 46c103a..30dc5e0 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -88,10 +88,24 @@ tabs: - title_md: edgeR description_md: | Differential gene expression analysis. + inputs: + - label: Gene count matrix + datatypes: + - tsv + - label: Sample metadata + datatypes: + - tsv - title_md: DESeq2 description_md: | Identify differentially expressed genes between groups. + inputs: + - label: Gene count matrix + datatypes: + - tsv + - label: Sample metadata + datatypes: + - tsv - id: visualisation title: Visualisation From 15819b2c92c9c63f74e23dedbf87ee6f391c72a9 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 19 Mar 2026 14:51:37 -0400 Subject: [PATCH 19/94] Add Metagenomics section to lab Created metagenomics.yml and added initial assembly tools (MEGAHIT, metaSPAdes/meta-hifiasm). Linked section in base.yml and homepage. --- agrf/sections/metagenomics.yml | 27 +++++++++++++++++++++++++++ agrf/static/local/base.yml | 1 + agrf/templates/intro.md | 3 +++ 3 files changed, 31 insertions(+) create mode 100644 agrf/sections/metagenomics.yml diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml new file mode 100644 index 0000000..fa998b4 --- /dev/null +++ b/agrf/sections/metagenomics.yml @@ -0,0 +1,27 @@ +id: metagenomics +title: Metagenomics + +tabs: + - id: overview + title: Overview + content: + - title_md: Metagenomics workflow + description_md: | + Metagenomics analysis involves assembly, binning, taxonomic classification, + and functional annotation of microbial communities. + + - id: tools + title: Tools + content: + subsections: + + - id: assembly + title: Assembly + content: + - title_md: MEGAHIT + description_md: | + Assemble short reads into contigs for metagenomic analysis. + + - title_md: metaSPAdes / meta-hifiasm + description_md: | + Assemble long-read or hybrid metagenomic data. \ No newline at end of file diff --git a/agrf/static/local/base.yml b/agrf/static/local/base.yml index 0ba1fbf..1bf6fe1 100644 --- a/agrf/static/local/base.yml +++ b/agrf/static/local/base.yml @@ -27,6 +27,7 @@ sections: - sections/qualitycontrol.yml - sections/microbial.yml - sections/rnaseq.yml + - sections/metagenomics.yml # - sections/moreanalysis.yml - sections/learn.yml # - sections/help.yml diff --git a/agrf/templates/intro.md b/agrf/templates/intro.md index 85dcf19..60ed3b8 100644 --- a/agrf/templates/intro.md +++ b/agrf/templates/intro.md @@ -79,6 +79,9 @@ then add Section, so data becomes dataSection + From 725a72648f434dbaf46c769d860a4dda95561af5 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 20 Mar 2026 11:24:24 -0400 Subject: [PATCH 20/94] Add assembly + QC Added metagenomics workflow including assembly and assembly QC. --- agrf/sections/metagenomics.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index fa998b4..ffb830a 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -24,4 +24,11 @@ tabs: - title_md: metaSPAdes / meta-hifiasm description_md: | - Assemble long-read or hybrid metagenomic data. \ No newline at end of file + Assemble long-read or hybrid metagenomic data. + - id: assembly_qc + title: Assembly quality control + content: + - title_md: QUAST + description_md: | + Assess the quality of assembled contigs, including length, completeness and fragmentation. + From 5d400483478cd9ba949b0ea93ce8bcfa4910f9fc Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 20 Mar 2026 11:56:45 -0400 Subject: [PATCH 21/94] Add binning to metagenomics workflow Added MetaBAT binning with inputs. --- agrf/sections/metagenomics.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index ffb830a..c873ae6 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -31,4 +31,17 @@ tabs: - title_md: QUAST description_md: | Assess the quality of assembled contigs, including length, completeness and fragmentation. + - id: binning + title: Binning + content: + - title_md: MetaBAT2 + description_md: | + Group assembled contigs into genome bins representing individual microbial genomes. + inputs: + - label: Assembled contigs (FASTA) + datatypes: + - fasta + - label: BAM files (mapped reads) + datatypes: + - bam From 343fb9318bf204d1b4759366dd2905747d867d6a Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 20 Mar 2026 12:55:56 -0400 Subject: [PATCH 22/94] Add binning to metagenomics workflow Added MetaBAT for binning contigs into genome bins based on coverage and composition. --- agrf/sections/metagenomics.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index c873ae6..c2fdcdb 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -45,3 +45,14 @@ tabs: datatypes: - bam + - id: binning_qc + title: Binning quality control + content: + - title_md: CheckM2 + description_md: | + Assess genome bin quality by estimating completeness and contamination. + + inputs: + - label: Genome bins + datatypes: + - fasta \ No newline at end of file From 90882eff22d6db33c848b30d1e09d0989c9ff53f Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 20 Mar 2026 13:02:40 -0400 Subject: [PATCH 23/94] Add taxonomic classification to metagenomics workflow Added GTDB-Tk for assigning taxonomy to genome bins. --- agrf/sections/metagenomics.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index c2fdcdb..2ee215b 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -55,4 +55,15 @@ tabs: inputs: - label: Genome bins datatypes: - - fasta \ No newline at end of file + - fasta + + - id: taxonomy + title: Taxonomic classification + content: + - title_md: GTDB-Tk + description_md: | + Assign taxonomy to genome bins using the Genome Taxonomy Database. + inputs: + - label: Genome bins + datatypes: + - fasta \ No newline at end of file From 829ebc9ac4f9a085eeabbfd73e1adf7ba880645b Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 20 Mar 2026 14:37:51 -0400 Subject: [PATCH 24/94] Complete metagenomics workflow with inputs, outputs and tool links Added assembly, QC, binning, binning QC, and taxonomy steps with inputs, outputs, and Galaxy tool links. --- agrf/sections/metagenomics.yml | 40 +++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index 2ee215b..5f46801 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -21,6 +21,14 @@ tabs: - title_md: MEGAHIT description_md: | Assemble short reads into contigs for metagenomic analysis. + inputs: + - label: Sequencing reads (FASTQ) + datatypes: + - fastq + outputs: + - label: Assembled contigs (FASTA) + button_md: Run MEGAHIT + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmegahit%2Fmegahit" - title_md: metaSPAdes / meta-hifiasm description_md: | @@ -31,6 +39,15 @@ tabs: - title_md: QUAST description_md: | Assess the quality of assembled contigs, including length, completeness and fragmentation. + inputs: + - label: Assembled contigs (FASTA) + datatypes: + - fasta + outputs: + - label: Assembly quality report + button_md: Run QUAST + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fquast%2Fquast" + - id: binning title: Binning content: @@ -44,6 +61,10 @@ tabs: - label: BAM files (mapped reads) datatypes: - bam + outputs: + - label: Genome bins (FASTA) + button_md: Run MetaBAT2 + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmetabat2%2Fmetabat2" - id: binning_qc title: Binning quality control @@ -52,10 +73,15 @@ tabs: description_md: | Assess genome bin quality by estimating completeness and contamination. - inputs: - - label: Genome bins - datatypes: - - fasta + inputs: + - label: Genome bins (FASTA) + datatypes: + - fasta + outputs: + - label: Completeness and contamination report + button_md: Run CheckM2 + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fcheckm2%2Fcheckm2" + - id: taxonomy title: Taxonomic classification @@ -66,4 +92,8 @@ tabs: inputs: - label: Genome bins datatypes: - - fasta \ No newline at end of file + - fasta + outputs: + - label: Taxonomic classification (TSV) + button_md: Run GTDB-Tk + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fgtdbtk_classify_wf%2Fgtdbtk_classify_wf" \ No newline at end of file From 74f0c0f466b1690a9e930beb9b6feb4f007560b4 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 26 Mar 2026 11:31:33 -0400 Subject: [PATCH 25/94] Updated RNA-seq section with tool links and inputs. Added Galaxy tool links (FastQC, MultiQC, STAR, featureCounts, edgeR, DESeq2) and defined inputs for core analysis steps. --- agrf/old_base.yml | 0 agrf/sections/rnaseq.yml | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 agrf/old_base.yml diff --git a/agrf/old_base.yml b/agrf/old_base.yml new file mode 100644 index 0000000..e69de29 diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 30dc5e0..cf0c762 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -22,10 +22,25 @@ tabs: - title_md: FastQC description_md: | Assess raw RNASeq read quality including base quality scores, GC content and adapter contamination. + inputs: + - label: Sequencing reads (FASTQ) + datatypes: + - fastq + button_md: Run FastQC + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" - title_md: MultiQC description_md: | Aggregate QC reports across multiple samples into a single summary report. + inputs: + - label: FastQC reports + datatypes: + - html + - zip + outputs: + - label: Summary QC report (HTML) + button_md: Run MultiQC + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmultiqc%2Fmultiqc" - id: alignment title: Alignment @@ -44,6 +59,8 @@ tabs: - label: Gene annotation file (GTF) datatypes: - gtf + outputs: + - label: Aligned reads (BAM) button_md: Run STAR button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Frgrnastar%2Frna_star" @@ -62,6 +79,8 @@ tabs: datatypes: - gtf - gff + outputs: + - label: Gene count matrix (TSV) button_md: Run featureCounts button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffeaturecounts%2Ffeaturecounts" @@ -95,6 +114,10 @@ tabs: - label: Sample metadata datatypes: - tsv + outputs: + - label: Differential expression results (TSV) + button_md: Run edgeR + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fedger%2Fedger" - title_md: DESeq2 description_md: | @@ -106,6 +129,8 @@ tabs: - label: Sample metadata datatypes: - tsv + button_md: Run DESeq2 + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fdeseq2%2Fdeseq2" - id: visualisation title: Visualisation From 135b3eedd754b8a632468cb80e2b0b09bd5bb192 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 26 Mar 2026 12:28:39 -0400 Subject: [PATCH 26/94] Refined RNA-seq workflow and fixed visualization section. Updated RNA-seq workflow by removing PCA tool link and refining visualization steps. Ensured correct inputs, outputs, and tool links for all sections. --- agrf/sections/rnaseq.yml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index cf0c762..a0911e2 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -129,6 +129,8 @@ tabs: - label: Sample metadata datatypes: - tsv + outputs: + - label: Differential expression results (TSV) button_md: Run DESeq2 button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fdeseq2%2Fdeseq2" @@ -137,12 +139,24 @@ tabs: content: - title_md: PCA description_md: | - Explore sample clustering. - + Explore sample clustering using principal component analysis. + - title_md: Heatmap description_md: | Display gene expression patterns across samples. + inputs: + - label: Expression matrix + datatypes: + - tsv + button_md: Run Heatmap + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fggplot2_heatmap2%2Fggplot2_heatmap2" - title_md: Volcano plot description_md: | - Visualise differential expression results (log fold change vs significance). \ No newline at end of file + Visualise differential expression results (log fold change vs significance). + inputs: + - label: Differential expression results (TSV) + datatypes: + - tsv + button_md: Run Volcano plot + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fvolcanoplot%2Fvolcanoplot" \ No newline at end of file From cd5700cd68e424f795a66609df5e88f899806360 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 26 Mar 2026 14:51:27 -0400 Subject: [PATCH 27/94] Refine metagenomics workflow and add inputs Updated metagenomics workflow by refining descriptions, adding inputs , and improving tool clarity. Ensured alignment with standard assembly, binning, QC, and taxonomy steps. --- agrf/sections/metagenomics.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index 5f46801..74e7826 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -7,8 +7,8 @@ tabs: content: - title_md: Metagenomics workflow description_md: | - Metagenomics analysis involves assembly, binning, taxonomic classification, - and functional annotation of microbial communities. + Metagenomics analysis involves assembly, binning, quality assessment, and taxonomic classification + of microbial communities. - id: tools title: Tools @@ -32,7 +32,12 @@ tabs: - title_md: metaSPAdes / meta-hifiasm description_md: | - Assemble long-read or hybrid metagenomic data. + Assemble long-read or hybrid metagenomic data (e.g., PacBio). + inputs: + - label: Long reads (FASTQ) + datatypes: + - fastq + - id: assembly_qc title: Assembly quality control content: From 1a7ef45e7e14f53843d296bf2bbbb7982d3863cd Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 26 Mar 2026 15:36:39 -0400 Subject: [PATCH 28/94] Refine microbial workflow and fix tool links Updated microbial profiling workflow by correcting tool links, fixing beta diversity steps, and improving overall structure and clarity. --- agrf/sections/microbial.yml | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index ae214be..48f79c4 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -231,7 +231,7 @@ tabs: - title_md: qiime2 diversity alpha-phylogenetic - Calculate alpha diversity (with a phylogenetic tree) description_md: | - Phylogenetic alpha diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included om AGRF's analysis) is: + Phylogenetic alpha diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included in AGRF's analysis) is: * Faith's Phylogenetic Distance @@ -243,8 +243,7 @@ tabs: - qza button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_phylogenetic%2Fqiime2__diversity__alpha_phylogenetic" - button_link: /request/vcs - + - title_md: qiime2 diversity alpha-correlation - Correlate alpha diversity with sample metadata description_md: | Determine whether numeric sample metadata columns are correlated with alpha diversity. @@ -327,7 +326,7 @@ tabs: datatypes: - qza - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha%2Fqiime2__diversity__alpha" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta%2Fqiime2__diversity__beta" - title_md: qiime2 diversity beta-phylogenetic - Calculate beta diversity (with a phylogenetic tree) @@ -343,9 +342,8 @@ tabs: datatypes: - qza - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_phylogenetic%2Fqiime2__diversity__alpha_phylogenetic" - button_link: /request/vcs - + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta_phylogenetic%2Fqiime2__diversity__beta_phylogenetic" + - title_md: qiime2 diversity beta-correlation - Correlate beta diversity with sample metadata description_md: | Determine whether numeric sample metadata columns are correlated with beta diversity. @@ -359,7 +357,7 @@ tabs: - qza - tsv - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_correlation%2Fqiime2__diversity__alpha_correlation" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta_correlation%2Fqiime2__diversity__beta_correlation" - title_md: qiime2 diversity beta-group-significance - Correlate beta diversity with groups in sample metadata description_md: | @@ -374,7 +372,7 @@ tabs: - qza - tsv - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_group_significance%2Fqiime2__diversity__alpha_group_significance" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta_group_significance%2Fqiime2__diversity__beta_group_significance" - title_md: qiime2 diversity pcoa - Principal coordinates analysis description_md: | @@ -399,7 +397,7 @@ tabs: datatypes: - qza - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_rarefaction%2Fqiime2__diversity__alpha_rarefaction" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta_rarefaction%2Fqiime2__diversity__beta_rarefaction" - title_md: PICRUSt2 metagenome prediction - Predict microbial functions From 0501ddf81c04d1b3a569d1b52c287402e613ccba Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 27 Mar 2026 11:24:30 -0400 Subject: [PATCH 29/94] Add microbial workflow with diversity analysis and tools Implemented microbial (16S) workflow including alpha and beta diversity analysis, ANCOM for differential abundance, PICRUSt2 for functional prediction, and visualization tools such as Krona, heatmaps, and taxonomy barplots. --- agrf/sections/microbial.yml | 90 +++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 39 deletions(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 48f79c4..9989bf5 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -96,9 +96,9 @@ tabs: title: Tools content: subsections: - - id: QIIME2_formats - title: Working with QIIME 2 files - content: + - id: QIIME2_formats + title: Working with QIIME 2 files + content: - title_md: Details description_md: | @@ -132,7 +132,6 @@ tabs: button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2_core__tools__export%2Fqiime2_core__tools__export" - - title_md: qiime2 feature-table summarize - Summarize feature table description_md: | Generate a summary of a feature table, including counts per sample and feature frequency distribution. @@ -166,8 +165,8 @@ tabs: - title_md: qiime2 feature-table filter-samples - Filter samples using metadata description_md: | - Remove samples from a feature table based on sample metadata. - This can be used to exclude groups or remove outlier samples before downstream analysis. + Filter samples using metadata to remove outliers or exclude specific groups before analysis. + inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: @@ -177,14 +176,14 @@ tabs: - tsv button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__feature_table__filter_samples%2Fqiime2__feature_table__filter_samples" - - id: alpha_diversity - title: Alpha Diversity + - id: alpha_diversity + title: Alpha Diversity - content: + content: - - title_md: Details - description_md: | - Alpha diversity measures the diversity *within* a single sample. There are a number of different metrics used. AGRF's analysis includes four alpha diversity metrics (stored within separate .qza files). + - title_md: Details + description_md: | + Alpha diversity measures the diversity *within* a single sample. There are a number of different metrics used. AGRF's analysis includes four alpha diversity metrics (stored within separate .qza files). - `observed_features_vector.qza` - Sample richness per sample. A count of the number of features (i.e. species) observed per sample. - `shannon_vector.qza` - Shannon entropy (i.e. Shannon index) for each sample. This is a quantitative measure of community richness (number of species present) and evenness. Specifically, it quantifies the uncertainty in predicting the species of an individual microbe (or effectively a read) taken at random from the sample. @@ -194,14 +193,14 @@ tabs: Each .qza file contains alpha-diversity.tsv which can be extracted in Galaxy using the `qiime tools export` tool. The .tsv contains two columns: [sample name.fastq] , [alpha diversity metric] - inputs: + inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - - title_md: qiime2 diversity alpha - Calculate alpha diversity (non-phylogenetic) - description_md: | + - title_md: qiime2 diversity alpha - Calculate alpha diversity (non-phylogenetic) + description_md: | Non-phylogenetic alpha diversity metrics provide a general overview of diversity based on counts or proportions. Common examples (included in AGRF's analysis) are: * Observed features (richness) @@ -210,12 +209,12 @@ tabs: Use this tool to calculate other non-phylogenetic alpha diversity metrics. - inputs: + inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - buttons: + buttons: - icon: run link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha%2Fqiime2__diversity__alpha" tip: QIIME2 - Alpha diversity @@ -226,29 +225,29 @@ tabs: link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_phylogenetic%2Fqiime2__diversity__alpha_phylogenetic" tip: QIIME2 - Alpha diversity (phlyogenetic) - #button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha%2Fqiime2__diversity__alpha" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha%2Fqiime2__diversity__alpha" - - title_md: qiime2 diversity alpha-phylogenetic - Calculate alpha diversity (with a phylogenetic tree) - description_md: | + - title_md: qiime2 diversity alpha-phylogenetic - Calculate alpha diversity (with a phylogenetic tree) + description_md: | Phylogenetic alpha diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included in AGRF's analysis) is: * Faith's Phylogenetic Distance Use this tool to calculate other phylogenetic alpha diversity metrics. - inputs: + inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_phylogenetic%2Fqiime2__diversity__alpha_phylogenetic" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_phylogenetic%2Fqiime2__diversity__alpha_phylogenetic" - - title_md: qiime2 diversity alpha-correlation - Correlate alpha diversity with sample metadata - description_md: | + - title_md: qiime2 diversity alpha-correlation - Correlate alpha diversity with sample metadata + description_md: | Determine whether numeric sample metadata columns are correlated with alpha diversity. - inputs: + inputs: - label: QIIME 2 Artifact file - Alpha Diversity datatypes: - qza @@ -257,13 +256,13 @@ tabs: - qza - tsv - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_correlation%2Fqiime2__diversity__alpha_correlation" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_correlation%2Fqiime2__diversity__alpha_correlation" - - title_md: qiime2 diversity alpha-group-significance - Correlate alpha diversity with groups in sample metadata - description_md: | + - title_md: qiime2 diversity alpha-group-significance - Correlate alpha diversity with groups in sample metadata + description_md: | Visually and statistically compare groups of alpha diversity values. - inputs: + inputs: - label: QIIME 2 Artifact file - Alpha Diversity datatypes: - qza @@ -272,26 +271,26 @@ tabs: - qza - tsv - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_group_significance%2Fqiime2__diversity__alpha_group_significance" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_group_significance%2Fqiime2__diversity__alpha_group_significance" - - title_md: qiime2 diversity alpha-rarefaction - Assess sequencing depth sufficiency - description_md: | + - title_md: qiime2 diversity alpha-rarefaction - Assess sequencing depth sufficiency + description_md: | QIIME 2 repeatedly subsamples (rarefies) each sample’s sequence data at different depths (e.g., 1000, 2000, 3000 reads, etc.). For each depth, it calculates an alpha diversity metric (e.g., Shannon index, Faith's PD). It does this multiple times per depth to account for random variation (controlled by the --p-iterations parameter). The result is a curve for each sample showing diversity vs. sampling effort. - inputs: + inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_rarefaction%2Fqiime2__diversity__alpha_rarefaction" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_rarefaction%2Fqiime2__diversity__alpha_rarefaction" - - id: beta_diversity - title: Beta Diversity + - id: beta_diversity + title: Beta Diversity - content: + content: - title_md: Details description_md: | @@ -302,7 +301,7 @@ tabs: - `evenness_vector.qza` - Pielous evenness index for each sample. A measure of how close in numbers (sequence counts) each species in a sample is. It is the ratio of the Shannon index to the maximum possible Shannon index if every species was equally likely. Value between 0 and 1. The closer to 1 the more even. - `faith_pd_vector.qza` - Faiths phylogenetic distance. A phylogenetically aware alpha diversity metric. Equal to the sum of all branch lengths of the phylogenetic tree that spans all members of the sample. The higher the number the greater the diversity. - Each .qza file contains alpha-diversity.tsv which can be extracted in Galaxy using the `qiime tools export' tool. The .tsv contains two columns: `[sample name.fastq]` , `[alpha diversity metric]` + Each .qza file contains alpha-diversity.tsv which can be extracted in Galaxy using the `qiime tools export` tool. The .tsv contains two columns: `[sample name.fastq]` , `[alpha diversity metric]` inputs: @@ -331,7 +330,7 @@ tabs: - title_md: qiime2 diversity beta-phylogenetic - Calculate beta diversity (with a phylogenetic tree) description_md: | - Phylogenetic beta diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included om AGRF's analysis) is: + Phylogenetic beta diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included in AGRF's analysis) is: * Faith's Phylogenetic Distance @@ -399,7 +398,20 @@ tabs: button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta_rarefaction%2Fqiime2__diversity__beta_rarefaction" + - title_md: qiime2 composition ancom - Differential abundance of taxa + description_md: | + Identify taxa that are differentially abundant between groups using ANCOM. + inputs: + - label: Feature Table + datatypes: + - qza + - label: sample metadata + datatypes: + - tsv + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__composition__ancom%2Fqiime2__composition__ancom" + + - title_md: PICRUSt2 metagenome prediction - Predict microbial functions description_md: | Predict microbial functional profiles such as gene families and metabolic pathways from microbial community data. From ca4202940ac18d3c09a641dabd918291c3837fc7 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 27 Mar 2026 12:33:53 -0400 Subject: [PATCH 30/94] added trim galore Removes adapters, low-quality base and produces clean FASTQ reads for alignment --- agrf/sections/rnaseq.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index a0911e2..3728968 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -41,7 +41,18 @@ tabs: - label: Summary QC report (HTML) button_md: Run MultiQC button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmultiqc%2Fmultiqc" - + + - title_md: Trim Galore - Trim adapters and low-quality bases + description_md: | + Remove adapter sequences and low-quality bases from RNASeq reads before alignment. + inputs: + - label: Sequencing reads (FASTQ) + datatypes: + - fastq + outputs: + - label: Trimmed reads (FASTQ) + button_md: Run Trim Galore + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbgruening%2Ftrim_galore%2Ftrim_galore" - id: alignment title: Alignment content: From 2cff7ffb9e346f3e77b4d40e6313a1f24aeae61a Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 27 Mar 2026 12:52:01 -0400 Subject: [PATCH 31/94] Added Sortmerna to remove rRNA reads. Filters out ribosomal RNA to retain clean reads for alignment. --- agrf/sections/rnaseq.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 3728968..057a2a3 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -53,6 +53,20 @@ tabs: - label: Trimmed reads (FASTQ) button_md: Run Trim Galore button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbgruening%2Ftrim_galore%2Ftrim_galore" + + - title_md: sortmerna + description_md: | + Remove rRNA reads from RNASeq data. + inputs: + - label: Sequencing reads (FASTQ) + datatypes: + - fastq + outputs: + - label: Filtered reads (FASTQ) + button_md: Run sortmerna + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Frnateam%2Fsortmerna%2Fbg_sortmerna" + + - id: alignment title: Alignment content: From c792cceb1d5413209a8b7fac696a8a2d8cdaf61d Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 27 Mar 2026 13:06:03 -0400 Subject: [PATCH 32/94] Added Samtools Sort Sorts BAM files, organizes aligned reads by genomic position for downstream analysis. --- agrf/sections/rnaseq.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 057a2a3..1b20c1e 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -66,7 +66,7 @@ tabs: button_md: Run sortmerna button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Frnateam%2Fsortmerna%2Fbg_sortmerna" - + - id: alignment title: Alignment content: @@ -88,6 +88,22 @@ tabs: - label: Aligned reads (BAM) button_md: Run STAR button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Frgrnastar%2Frna_star" + + - id: post_alignment + title: BAM processing + content: + - title_md: samtools sort + description_md: | + Sort aligned reads (BAM) by genomic coordinates. + + inputs: + - label: Aligned reads (BAM) + datatypes: + - bam + outputs: + - label: Sorted BAM + button_md: Run samtools sort + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Fsamtools_sort%2Fsamtools_sort" - id: quantification title: Gene quantification From 2ddb08768fb7445c31bc8b90d11adf3247572702 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 27 Mar 2026 13:10:11 -0400 Subject: [PATCH 33/94] Added StringTie Assembles transcripts from RNA-seq data, estimates gene expression from aligned reads. --- agrf/sections/rnaseq.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 1b20c1e..3110588 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -125,6 +125,21 @@ tabs: button_md: Run featureCounts button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffeaturecounts%2Ffeaturecounts" + - title_md: StringTie + description_md: | + Assemble transcripts and estimate gene expression from aligned RNASeq reads. + inputs: + - label: Aligned reads (BAM) + datatypes: + - bam + - label: Reference annotation (GTF) + datatypes: + - gtf + outputs: + - label: Transcript assembly (GTF) + button_md: Run StringTie + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstringtie%2Fstringtie" + - id: filtering title: Filtering content: From 328692b7377fcbc64e71f0b27317f2f6c072ee2d Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 27 Mar 2026 15:33:44 -0400 Subject: [PATCH 34/94] GBS analysis using ustacks, cstacks, sstacks, tsv2bam, gstacks, and populations. This workflow processes sequencing data to build loci, create a catalog, match samples, convert data, call SNPs, and generate population statistics. --- agrf/sections/moreanalysis.yml | 40 ++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/agrf/sections/moreanalysis.yml b/agrf/sections/moreanalysis.yml index c3925e3..32de14d 100644 --- a/agrf/sections/moreanalysis.yml +++ b/agrf/sections/moreanalysis.yml @@ -56,3 +56,43 @@ tabs: description_md: This tool does xyz - title_md: WF1 description_md: This wf does xyz + + - id: gbs + title: GBS + heading_md: > + content: + - title_md: ustacks + description_md: > + Build loci for each sample from sequencing reads. + button_md: Run ustacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_ustacks%2Fstacks_ustacks" + + - title_md: cstacks + description_md: > + Create a catalog of loci from multiple samples. + button_md: Run cstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_cstacks%2Fstacks_cstacks" + . + - title_md: sstacks + description_md: > + Match individual samples to the catalog of loci. + button_md: Run sstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_sstacks%2Fstacks2_sstacks" + + - title_md: tsv2bam + description_md: > + Convert stacks output into BAM format for downstream analysis. + button_md: Run tsv2bam + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_tsv2bam%2Fstacks2_tsv2bam" + + - title_md: gstacks + description_md: > + Assemble loci and call SNPs across all samples. + button_md: Run gstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_gstacks%2Fstacks2_gstacks" + + - title_md: populations + description_md: > + Calculate population-level statistics and export variant data. + button_md: Run populations + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_populations%2Fstacks2_populations" \ No newline at end of file From 8d53eb11f09611ef2691735861f8289464e380fb Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 27 Mar 2026 15:58:00 -0400 Subject: [PATCH 35/94] Added tutorial section for RNASeq with transcriptomics GTN link. Added a tutorial section in the RNASeq module with a link to Galaxy Training Network transcriptomics resources. This provides users with access to RNASeq tutorials and learning materials for analysis workflows. --- agrf/sections/metagenomics.yml | 10 +++++++++- agrf/sections/rnaseq.yml | 12 +++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index 74e7826..31e75e5 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -101,4 +101,12 @@ tabs: outputs: - label: Taxonomic classification (TSV) button_md: Run GTDB-Tk - button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fgtdbtk_classify_wf%2Fgtdbtk_classify_wf" \ No newline at end of file + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fgtdbtk_classify_wf%2Fgtdbtk_classify_wf" + + - id: tutorials + title: Tutorials + content: + - title_md: Metagenomics analysis tutorials + description_md: Learn how to perform metagenomic classification, assembly, and functional analysis in Galaxy. + button_md: Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/metagenomics/ \ No newline at end of file diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 3110588..e579edf 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -215,4 +215,14 @@ tabs: datatypes: - tsv button_md: Run Volcano plot - button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fvolcanoplot%2Fvolcanoplot" \ No newline at end of file + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fvolcanoplot%2Fvolcanoplot" + + + - id: tutorials + title: Tutorials + heading_md: + content: + - title_md: RNA-seq analysis workflow tutorial + description_md: tool + button_md: Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/transcriptomics/ \ No newline at end of file From ba69e09dbc5aa3d10d6d55e24a8a8c6daa850f66 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 10 Apr 2026 12:44:54 -0400 Subject: [PATCH 36/94] Improved RNASeq tools section with inline descriptions Added concise inline descriptions to RNASeq tools using title_md for better readability Updated multiple tools for consistency and clarity Maintained detailed explanations using description_md --- agrf/sections/rnaseq.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index e579edf..37a1035 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -19,7 +19,7 @@ tabs: - id: qc title: Quality control content: - - title_md: FastQC + - title_md: FastQC - Assess sequencing read quality description_md: | Assess raw RNASeq read quality including base quality scores, GC content and adapter contamination. inputs: @@ -29,7 +29,7 @@ tabs: button_md: Run FastQC button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" - - title_md: MultiQC + - title_md: MultiQC - Aggregate QC reports description_md: | Aggregate QC reports across multiple samples into a single summary report. inputs: @@ -54,7 +54,7 @@ tabs: button_md: Run Trim Galore button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbgruening%2Ftrim_galore%2Ftrim_galore" - - title_md: sortmerna + - title_md: sortmerna - Remove rRNA contamination description_md: | Remove rRNA reads from RNASeq data. inputs: @@ -70,7 +70,7 @@ tabs: - id: alignment title: Alignment content: - - title_md: STAR - Align reads to reference + - title_md: STAR - Align reads to reference genome description_md: | Align RNASeq reads to a reference genome using STAR. @@ -92,7 +92,7 @@ tabs: - id: post_alignment title: BAM processing content: - - title_md: samtools sort + - title_md: samtools sort - Sort BAM files description_md: | Sort aligned reads (BAM) by genomic coordinates. @@ -125,7 +125,7 @@ tabs: button_md: Run featureCounts button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffeaturecounts%2Ffeaturecounts" - - title_md: StringTie + - title_md: StringTie - Assemble and quantify transcripts description_md: | Assemble transcripts and estimate gene expression from aligned RNASeq reads. inputs: @@ -160,7 +160,7 @@ tabs: - id: analysis title: Differential expression content: - - title_md: edgeR + - title_md: edgeR - Differential expression analysis description_md: | Differential gene expression analysis. inputs: @@ -175,7 +175,7 @@ tabs: button_md: Run edgeR button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fedger%2Fedger" - - title_md: DESeq2 + - title_md: DESeq2 - Differential gene expression analysis description_md: | Identify differentially expressed genes between groups. inputs: @@ -193,11 +193,11 @@ tabs: - id: visualisation title: Visualisation content: - - title_md: PCA + - title_md: PCA - Visualise sample clustering description_md: | Explore sample clustering using principal component analysis. - - title_md: Heatmap + - title_md: Heatmap - Visualise gene expression patterns description_md: | Display gene expression patterns across samples. inputs: @@ -207,7 +207,7 @@ tabs: button_md: Run Heatmap button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fggplot2_heatmap2%2Fggplot2_heatmap2" - - title_md: Volcano plot + - title_md: Volcano plot - Visualise differential expression results description_md: | Visualise differential expression results (log fold change vs significance). inputs: From c9d74819702518ec7c49b7019495c2a09ce7759f Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 10 Apr 2026 12:53:47 -0400 Subject: [PATCH 37/94] Improved Metagenomics tools section with inline description Added concise inline descriptions to metagenomics tools for better readability and consistency. --- agrf/sections/metagenomics.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index 31e75e5..3821e58 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -18,7 +18,7 @@ tabs: - id: assembly title: Assembly content: - - title_md: MEGAHIT + - title_md: MEGAHIT - Assemble short-reads into contigs description_md: | Assemble short reads into contigs for metagenomic analysis. inputs: @@ -30,7 +30,7 @@ tabs: button_md: Run MEGAHIT button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmegahit%2Fmegahit" - - title_md: metaSPAdes / meta-hifiasm + - title_md: metaSPAdes / meta-hifiasm - Assemble long-reads or hybrid reads description_md: | Assemble long-read or hybrid metagenomic data (e.g., PacBio). inputs: @@ -41,7 +41,7 @@ tabs: - id: assembly_qc title: Assembly quality control content: - - title_md: QUAST + - title_md: QUAST - Assess assembly quality description_md: | Assess the quality of assembled contigs, including length, completeness and fragmentation. inputs: @@ -56,7 +56,7 @@ tabs: - id: binning title: Binning content: - - title_md: MetaBAT2 + - title_md: MetaBAT2 - Bin contigs into genomes description_md: | Group assembled contigs into genome bins representing individual microbial genomes. inputs: @@ -74,7 +74,7 @@ tabs: - id: binning_qc title: Binning quality control content: - - title_md: CheckM2 + - title_md: CheckM2 - Assess completeness and contamination description_md: | Assess genome bin quality by estimating completeness and contamination. @@ -91,7 +91,7 @@ tabs: - id: taxonomy title: Taxonomic classification content: - - title_md: GTDB-Tk + - title_md: GTDB-Tk - Assign taxonomy to genome bins description_md: | Assign taxonomy to genome bins using the Genome Taxonomy Database. inputs: From f60f61b004dc8a47a51c12f07ce3839ebc2acc4e Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 17 Apr 2026 12:21:55 -0400 Subject: [PATCH 38/94] Add GBS section with STACKS workflow, inputs/outputs, and tutorials Created a new GBS section including Overview, Tools, and Tutorials tabs. Added STACKS workflow tools with descriptions, inputs, outputs, and Galaxy tool links. Included variant analysis tutorials from the Galaxy Training Network. --- agrf/sections/gbs.yml | 110 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 agrf/sections/gbs.yml diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml new file mode 100644 index 0000000..f1dd4a0 --- /dev/null +++ b/agrf/sections/gbs.yml @@ -0,0 +1,110 @@ +id: gbs +title: GBS + +tabs: + - id: overview + title: Overview + content: + - title_md: GBS (ddRADSeq) workflow + description_md: | + Genotyping-by-sequencing (GBS/ddRADSeq) analysis using the STACKS pipeline. + This workflow supports locus construction, catalog matching, variant calling, + and downstream population genetics analysis. + + - id: tools + title: Tools + content: + subsections: + - id: stacks + title: STACKS workflow + content: + + - title_md: ustacks - Build loci for each sample + description_md: | + Build loci from sequencing reads for each sample. + inputs: + - label: Sequencing reads (FASTQ) + datatypes: + - fastqsanger + - fasta + outputs: + - label: Sample loci + button_md: Run ustacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_ustacks%2Fstacks_ustacks" + + - title_md: cstacks - Create catalog of loci + description_md: | + Create a catalog of loci across multiple samples. + inputs: + - label: Loci from multiple samples + datatypes: + - tabular + outputs: + - label: Catalog of loci + button_md: Run cstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_cstacks%2Fstacks_cstacks" + + - title_md: sstacks - Match samples to catalog + description_md: | + Match each sample to the catalog of loci. + inputs: + - label: Sample loci + datatypes: + - tabular + - label: Catalog of loci + datatypes: + - tabular + outputs: + - label: Matches to catalog + button_md: Run sstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_sstacks%2Fstacks2_sstacks" + + - title_md: tsv2bam - Convert TSV to BAM + description_md: | + Convert TSV genotype data into BAM format for downstream analysis. + inputs: + - label: Loci and polymorphism + datatypes: + - tabular + - label: Catalog of loci + datatypes: + - tabular + - label: Matches to catalog + datatypes: + - tabular + outputs: + - label: BAM alignments + button_md: Run tsv2bam + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_tsv2bam%2Fstacks2_tsv2bam" + + - title_md: gstacks - Assemble loci and call variants + description_md: | + Assemble loci, align reads, and perform variant calling. + inputs: + - label: BAM alignments + datatypes: + - bam + outputs: + - label: Variant calls and assembled loci + button_md: Run gstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_gstacks%2Fstacks2_gstacks" + + - title_md: populations - Population genetics analysis + description_md: | + Generate population-level statistics and export results for downstream analysis. + inputs: + - label: Variant calls / loci + datatypes: + - vcf + outputs: + - label: Population statistics and export files + button_md: Run populations + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_populations%2Fstacks2_populations" + + - id: tutorials + title: Tutorials + content: + - title_md: GBS analysis tutorials + description_md: Explore Galaxy Training Network tutorials for variant analysis workflows relevant to GBS and population genomics studies. + button_md: Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/variant-analysis/ From d7877adf26d385dbe19cd59fa0e0cc1f2af1418f Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 17 Apr 2026 12:22:39 -0400 Subject: [PATCH 39/94] updated intro.md Added GBS section into intro --- agrf/templates/intro.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/agrf/templates/intro.md b/agrf/templates/intro.md index 60ed3b8..2d91132 100644 --- a/agrf/templates/intro.md +++ b/agrf/templates/intro.md @@ -82,6 +82,9 @@ then add Section, so data becomes dataSection +
+ +
From 5a37b9ab4e11ebcfc7e813460bbb2099acd4ba8a Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 17 Apr 2026 12:49:51 -0400 Subject: [PATCH 40/94] Added Learn Galaxy section with training resources and tutorials Implemented a new "Learn Galaxy" section in the AGRF Lab interface to guide users toward relevant training and documentation resources. The section includes curated links to Galaxy Training Network tutorials covering: - Variant analysis (including GBS workflows) - Microbiome analysis - RNA-seq workflows - Metagenomics pipelines - Population genomics approaches Also added links to official Galaxy documentation for additional support. This enhancement improves usability by helping users understand and apply Galaxy tools and workflows effectively. --- agrf/sections/learn.yml | 56 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/agrf/sections/learn.yml b/agrf/sections/learn.yml index e756b22..2fc2617 100644 --- a/agrf/sections/learn.yml +++ b/agrf/sections/learn.yml @@ -1,14 +1,58 @@ id: learn title: Learn Galaxy + tabs: - id: overview title: Overview - heading_md: + heading_md: + Learn how to use Galaxy through tutorials, workflows, and official documentation. content: - - title_md: abc + - title_md: Galaxy Training Resources + description_md: | + Learn how to use Galaxy through step-by-step tutorials from the Galaxy Training Network. + These resources cover a wide range of bioinformatics workflows including RNA-seq, + microbiome analysis, metagenomics, and variant analysis. + button_md: Browse Tutorials + button_link: https://training.galaxyproject.org/training-material/ + + - title_md: Variant Analysis Tutorials + description_md: | + Explore tutorials for SNP calling, variant analysis, and related workflows + relevant to GBS and population genomics studies. + button_md: View Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/variant-analysis/ + + - title_md: Microbiome Tutorials + description_md: | + Learn microbial community analysis workflows in Galaxy, including QIIME 2 based + approaches for diversity analysis and taxonomic profiling. + button_md: View Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/microbiome/ + + - title_md: RNA-seq Tutorials + description_md: | + Learn RNA-seq analysis workflows in Galaxy, including alignment, quantification, + and differential expression analysis. + button_md: View Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/transcriptomics/ + + - title_md: Metagenomics Tutorials description_md: | - * abc + Explore tutorials for metagenome assembly, classification, and downstream + analysis workflows in Galaxy. + button_md: View Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/metagenomics/ - - button_md: Upload data - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=upload1" \ No newline at end of file + - title_md: GBS and Population Genomics Tutorials + description_md: | + Learn workflows relevant to Genotyping-by-Sequencing (GBS), including variant calling, + SNP analysis, and population genomics approaches in Galaxy. + button_md: View Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/variant-analysis/ + + - title_md: Galaxy Help and Documentation + description_md: | + Access official Galaxy documentation, user guides, and help resources to understand + tools, workflows, and data management in Galaxy. + button_md: Open Documentation + button_link: https://galaxyproject.org/learn/ \ No newline at end of file From 2c90d2383571af0321dff8220e7319bea4b0b60c Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 17 Apr 2026 13:04:45 -0400 Subject: [PATCH 41/94] Updated microbial tutorial descriptions Replaced placeholder text with a clear description of microbiome analysis workflows in Galaxy, including diversity analysis and taxonomy. --- agrf/sections/microbial.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 9989bf5..fbd93c2 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -465,7 +465,7 @@ tabs: content: - title_md: Calculating α and β diversity from microbiome taxonomic data - description_md: tool + description_md: Learn how to analyse microbiome data in Galaxy, including calculating alpha and beta diversity, exploring taxonomic composition, and visualising microbial community differences. button_md: Tutorials button_link: https://training.galaxyproject.org/training-material//topics/microbiome/tutorials/diversity/tutorial.html From 0e46bdf6306212e1b866f5471fd73909a3b9850a Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 17 Apr 2026 13:05:07 -0400 Subject: [PATCH 42/94] Updated RNA-seq tutorials description Replaced placeholder text with a brief description of RNA-seq workflows, including alignment and differential expression analysis. --- agrf/sections/rnaseq.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 37a1035..b3dc214 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -223,6 +223,6 @@ tabs: heading_md: content: - title_md: RNA-seq analysis workflow tutorial - description_md: tool + description_md: Learn how to perform RNA-seq analysis in Galaxy, including read preprocessing, alignment, transcript assembly, and differential gene expression analysis. button_md: Tutorials button_link: https://training.galaxyproject.org/training-material/topics/transcriptomics/ \ No newline at end of file From 7068b5ce76e79b6961c3aff48ec235efe6f0267a Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 17 Apr 2026 13:05:57 -0400 Subject: [PATCH 43/94] Improved metagenomics tutorials description Updated the description to better explain metagenomics workflows such as classification, assembly, and analysis in Galaxy. --- agrf/sections/metagenomics.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index 3821e58..1e6062c 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -107,6 +107,6 @@ tabs: title: Tutorials content: - title_md: Metagenomics analysis tutorials - description_md: Learn how to perform metagenomic classification, assembly, and functional analysis in Galaxy. + description_md: Learn how to perform metagenomic analysis in Galaxy, including taxonomic classification, assembly, binning, and downstream functional analysis workflows. button_md: Tutorials button_link: https://training.galaxyproject.org/training-material/topics/metagenomics/ \ No newline at end of file From 6a8b86f71691fe4a8737e945f42c85a77553dd36 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 17 Apr 2026 13:41:22 -0400 Subject: [PATCH 44/94] Add Bakta to metagenomics tools Added Bakta for functional annotation of metagenomic assemblies. --- agrf/sections/metagenomics.yml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index 1e6062c..0227c30 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -102,8 +102,23 @@ tabs: - label: Taxonomic classification (TSV) button_md: Run GTDB-Tk button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fgtdbtk_classify_wf%2Fgtdbtk_classify_wf" - - - id: tutorials + + - id: functional_annotation + title: Functional annotation + content: + - title_md: Bakta - Functional annotation of genomes + description_md: | + Annotate assembled genomes and bins to identify genes and functional features. + This helps in understanding the biological roles of microbial communities. + inputs: + - label: Genome bins or contigs (FASTA) + datatypes: + - fasta + outputs: + - label: Annotated genomes and functional features + button_md: Run Bakta + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbakta%2Fbakta%2F1.9.4%2Bgalaxy1&version=latest" + - id: tutorials title: Tutorials content: - title_md: Metagenomics analysis tutorials From acc95debab13448cc1ee6578eeedbecb23ddedfc Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 17 Apr 2026 15:45:09 -0400 Subject: [PATCH 45/94] Add MAFFT, BLAST, and FastTree tools to microbial section Added key QIIME2 tools to the microbial (16S) analysis section. - Added MAFFT for multiple sequence alignment - Added BLAST (feature-classifier) for taxonomic classification - Added FastTree for phylogenetic tree construction Updated button links to use correct Galaxy tool IDs and removed version-specific URLs for consistency. Ensured proper placement under the tools section and verified rendering locally. --- agrf/sections/microbial.yml | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index fbd93c2..aecbbb5 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -440,8 +440,6 @@ tabs: - qza button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__feature_table__heatmap%2Fqiime2__feature_table__heatmap" - - - title_md: qiime2 taxa barplot - Visualise taxonomic composition description_md: | Generate interactive stacked bar plots showing the relative abundance of taxa across samples. @@ -457,7 +455,26 @@ tabs: - tsv button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__taxa__barplot%2Fqiime2__taxa__barplot" + - id: species_identification + title: Species identification and validation + content: + - title_md: BLAST - Identify sequences using database search + description_md: | + Compare sequences against reference databases to identify closest matching species. + button_md: Run BLAST + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu/repos/q2d2/qiime2__feature_classifier__classify_consensus_blast/qiime2__feature_classifier__classify_consensus_blast" + - title_md: MAFFT - Multiple sequence alignment + description_md: | + Align sequences with references for phylogenetic analysis. + button_md: Run MAFFT + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu/repos/q2d2/qiime2__alignment__mafft/qiime2__alignment__mafft" + + - title_md: FastTree - Build phylogenetic tree + description_md: | + Construct phylogenetic trees to compare samples with known species. + button_md: Run FastTree + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu/repos/iuc/fasttree/fasttree" - id: tutorials title: Tutorials From c3d7f8a9bec3fa9b3710e7ee41c73acbaa01a863 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 17 Apr 2026 16:19:42 -0400 Subject: [PATCH 46/94] updated base.yml added gbs into base.yml --- agrf/static/local/base.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/agrf/static/local/base.yml b/agrf/static/local/base.yml index 1bf6fe1..5f2673f 100644 --- a/agrf/static/local/base.yml +++ b/agrf/static/local/base.yml @@ -28,6 +28,7 @@ sections: - sections/microbial.yml - sections/rnaseq.yml - sections/metagenomics.yml + - sections/gbs.yml # - sections/moreanalysis.yml - sections/learn.yml # - sections/help.yml From 4d3cd2ea0f00c7af0df570f67ef7c5695c44f2c1 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 24 Apr 2026 10:30:08 -0400 Subject: [PATCH 47/94] Add RNASeq overview, outputs, and file descriptions Added complete RNASeq overview section including workflow description, results, and file outputs. Structured content to match microbial section with clear categories for raw data, QC, alignment, quantification, differential expression, and visualisation. Improved clarity and consistency of file descriptions and formats. --- agrf/sections/rnaseq.yml | 73 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 5 deletions(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index b3dc214..4b3aea6 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -3,13 +3,74 @@ title: RNASeq tabs: - id: overview title: Overview - heading_md: content: - - title_md: RNASeq workflow + - title_md: About the service description_md: | - RNA sequencing (RNASeq) measures gene expression levels across samples. - The workflow includes alignment, filtering, normalization, - differential expression analysis and visualization. + RNA sequencing (RNASeq) is used to quantify gene expression and identify differentially expressed genes across biological conditions. In this workflow, raw sequencing reads are quality checked, filtered, and aligned to a reference genome using STAR. Gene-level quantification is performed using featureCounts or StringTie, followed by statistical analysis using edgeR or DESeq2. + + This pipeline enables identification of transcriptional changes, biological pathway alterations, and potential biomarkers associated with experimental conditions. + + - title_md: Results include + description_md: | + RNASeq analysis generates multiple output files at different stages of the workflow. + + **Processed read data** + - Quality filtered FASTQ files (optional after trimming) + + **Alignment outputs** + - BAM files containing aligned reads + - Sorted and indexed BAM files + + **Quantification outputs** + - Gene-level count matrix (tabular format) + - Transcript abundance estimates (StringTie output) + + **Differential expression results** + - Tables of differentially expressed genes (log fold change, p-values, adjusted p-values) + + **Visualisation outputs** + - PCA plots (sample clustering) + - Heatmaps (expression patterns) + - Volcano plots (significance vs fold change) + + - title_md: What files are included? + description_md: | + **Raw Data** + - Demultiplexed FASTQ files (per sample) containing sequencing reads + + **Quality Control** + - FastQC reports (HTML) providing per-sample quality metrics + - MultiQC summary report aggregating quality results across all samples + + **Alignment** + - BAM files containing reads mapped to the reference genome + + **Quantification** + - Gene count matrix (.tsv or .txt) containing gene-level expression values + - Transcript abundance files (StringTie output) + + **Differential Expression** + - DEG results table (.tsv) including log fold change, p-values, and adjusted p-values + + **Visualisation** + - PCA plots, heatmaps, and volcano plots for downstream interpretation + + - title_md: File formats used + description_md: | + **.fastq** + - Raw sequencing reads + + **.bam** + - Binary alignment file storing mapped reads + + **.tsv / .txt** + - Tabular files containing gene counts and statistical results + + **.html** + - Quality control reports (FastQC, MultiQC) + + **.csv** + - Processed results and downstream analysis tables (e.g., DEG results) - id: tools title: Tools @@ -26,6 +87,8 @@ tabs: - label: Sequencing reads (FASTQ) datatypes: - fastq + outputs: + - label: FastQC report (HTML) button_md: Run FastQC button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" From d2370386ca6b71a714a1734191ab57d0d061b17f Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 24 Apr 2026 10:54:27 -0400 Subject: [PATCH 48/94] Add Metagenomics overview and structured output tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added the Metagenomics section to the AGRF Lab page, including: - Overview of metagenomics workflow (assembly, binning, annotation) - Structured “Results include” section - Clean “What files are included” table for output files - File formats used for downstream analysis Improved formatting for consistency with existing microbial section. --- agrf/sections/metagenomics.yml | 65 ++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index 0227c30..ceb2a04 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -5,10 +5,69 @@ tabs: - id: overview title: Overview content: - - title_md: Metagenomics workflow + - title_md: About the service description_md: | - Metagenomics analysis involves assembly, binning, quality assessment, and taxonomic classification - of microbial communities. + Metagenomics analysis enables the study of microbial communities directly from environmental or host-associated samples without the need for culturing. + This workflow processes sequencing reads through assembly, quality assessment, genome binning, and taxonomic and functional annotation. + + Short-read assemblies are generated using MEGAHIT, while long-read assemblies are performed using meta-hifiasm. + Assembly quality is evaluated using QUAST, and genome binning is carried out using MetaBAT to reconstruct individual microbial genomes. + + Bin quality is assessed using CheckM2, which estimates genome completeness and contamination. + Taxonomic classification is performed using GTDB-Tk, and functional annotation is conducted using Bakta to identify genes and biological functions. + + This pipeline provides insights into microbial diversity, genome composition, and functional potential of complex microbial communities. + + - title_md: Results include + description_md: | + Metagenomics analysis generates multiple output files across different stages of the workflow. + + **Processed read data** + - Quality filtered sequencing reads (FASTQ files) + + **Assembly outputs** + - Contigs or assembled genomes representing microbial sequences + + **Assembly quality assessment** + - QUAST reports summarising assembly statistics (e.g., N50, contig length, total assembly size) + + **Genome binning** + - Genome bins representing reconstructed microbial genomes + + **Binning quality assessment** + - CheckM2 reports indicating completeness and contamination levels of genome bins + + **Taxonomic classification** + - GTDB-Tk results assigning taxonomy to genome bins + + **Functional annotation** + - Bakta outputs including gene predictions and functional annotations + + **Summary outputs** + - Tables and reports for downstream analysis and interpretation + + - title_md: What files are included? + description_md: | + | **Filename** | **Description** | + |-------------|----------------| + | Demultiplexed *.FASTQ files (per sample) | Raw sequencing reads for each sample | + | contigs.fasta | Assembled contigs generated from sequencing reads | + | quast_report.html | Assembly quality report including N50, contig length, and summary statistics | + | bins/*.fa | Genome bins representing reconstructed microbial genomes | + | checkm2_results.tsv | Genome bin quality metrics (completeness and contamination) | + | gtdbtk_classification.tsv | Taxonomic classification of genome bins using GTDB-Tk | + | bakta_annotations.tsv | Functional annotations including gene predictions and protein functions | + | summary_tables.tsv | Summary tables for downstream analysis and interpretation | + + - title_md: File formats used + description_md: | + | **Type** | **Description** | + |---------|----------------| + | .fastq | Raw sequencing reads | + | .fasta / .fa | Assembled contigs or genome bins | + | .tsv | Tabular files containing QC metrics, taxonomy, and annotations | + | .html | Quality reports (e.g., QUAST) | + | .gff | Gene annotations and genomic features | - id: tools title: Tools From 2d05417e13f4cabebc0829fb661969cae27979e9 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 24 Apr 2026 11:20:29 -0400 Subject: [PATCH 49/94] Added GBS (ddRADSeq) overview section with workflow description and outputs Implemented GBS overview section including service description, expected results, file outputs, and file format definitions. Structured content to align with microbial and metagenomics sections for consistency. --- agrf/sections/gbs.yml | 48 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index f1dd4a0..36fbe8e 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -5,11 +5,51 @@ tabs: - id: overview title: Overview content: - - title_md: GBS (ddRADSeq) workflow + - title_md: About the service description_md: | - Genotyping-by-sequencing (GBS/ddRADSeq) analysis using the STACKS pipeline. - This workflow supports locus construction, catalog matching, variant calling, - and downstream population genetics analysis. + Genotyping-by-sequencing (GBS/ddRADSeq) is used to identify genetic variants across multiple samples for population genetics and related studies. + + AGRF currently runs GBS analysis using NGSEP because it is efficient for larger sample sets. However, NGSEP is not currently available in Galaxy. This Galaxy section therefore provides STACKS tools as an alternative option for users who want to reanalyse raw FASTQ data in Galaxy. + + Users may also import existing VCF outputs into Galaxy for downstream filtering and analysis using tools such as VCFtools and PLINK. + + - title_md: Results include + description_md: | + **Raw read data** - Demultiplexed sequencing reads in FASTQ format + + **Variant calls** - VCF files containing SNP and genotype information + + **Consensus sequences** - FASTA files containing consensus sequences + + **Filtered variant outputs** - VCF files filtered by missingness, minor allele frequency, or other criteria + + **PLINK outputs** - Files prepared for downstream population genetic analysis + + **Distance and visualisation outputs** - IBS distance matrix and heatmap-style outputs for sample comparison + + - title_md: What files are included? + description_md: | + | **Filename** | **Description** | + |-------------|----------------| + | Demultiplexed *.FASTQ files | Raw sequencing reads for each sample | + | variants.vcf | Variant calls containing SNP and genotype information | + | consensus_sequences.fasta | Consensus sequences generated from variant/locus analysis | + | filtered_high_level.vcf | VCF filtered using stricter missingness and MAF thresholds | + | filtered_low_level.vcf | VCF filtered using less stringent missingness and MAF thresholds | + | populations.plink.* | PLINK-format files generated from VCF data | + | ibs_distance_matrix.tsv | Identity-by-state distance matrix for sample comparison | + | heatmap_output.html / .png | Visualisation output showing sample relatedness or clustering | + + - title_md: File formats used + description_md: | + | **Type** | **Description** | + |---------|----------------| + | .fastq | Raw sequencing reads | + | .vcf | Variant call format containing SNP and genotype data | + | .fasta / .fa | Consensus sequences or assembled loci | + | .ped / .map | PLINK genotype input files | + | .tsv / .txt | Tabular outputs, filtering summaries, or distance matrices | + | .html / .png | Visualisation outputs such as heatmaps | - id: tools title: Tools From 3ec900a49a1adcae0cff9ab500fd4642894981d2 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 24 Apr 2026 11:28:16 -0400 Subject: [PATCH 50/94] Refined RNA-seq overview formatting and consistency Improved RNA-seq section by standardising terminology, refining file naming conventions, and enhancing clarity of output descriptions to maintain consistency across all AGRF lab sections. --- agrf/sections/rnaseq.yml | 77 ++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 50 deletions(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 4b3aea6..9af839e 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -6,72 +6,49 @@ tabs: content: - title_md: About the service description_md: | - RNA sequencing (RNASeq) is used to quantify gene expression and identify differentially expressed genes across biological conditions. In this workflow, raw sequencing reads are quality checked, filtered, and aligned to a reference genome using STAR. Gene-level quantification is performed using featureCounts or StringTie, followed by statistical analysis using edgeR or DESeq2. + RNA sequencing (RNASeq) is used to quantify gene expression and identify differentially expressed genes across biological conditions. + + In this workflow, raw sequencing reads are quality checked, trimmed if required, and aligned to a reference genome using STAR. Gene-level quantification is performed using featureCounts or StringTie, followed by statistical analysis using edgeR or DESeq2. This pipeline enables identification of transcriptional changes, biological pathway alterations, and potential biomarkers associated with experimental conditions. - title_md: Results include description_md: | - RNASeq analysis generates multiple output files at different stages of the workflow. + **Raw read data** - Demultiplexed sequencing reads in FASTQ format - **Processed read data** - - Quality filtered FASTQ files (optional after trimming) + **Quality control outputs** - FastQC reports and MultiQC summary - **Alignment outputs** - - BAM files containing aligned reads - - Sorted and indexed BAM files + **Alignment outputs** - BAM files containing mapped reads - **Quantification outputs** - - Gene-level count matrix (tabular format) - - Transcript abundance estimates (StringTie output) + **Quantification outputs** - Gene count matrices and transcript abundance estimates - **Differential expression results** - - Tables of differentially expressed genes (log fold change, p-values, adjusted p-values) + **Differential expression results** - Tables of differentially expressed genes (log fold change, p-values, adjusted p-values) - **Visualisation outputs** - - PCA plots (sample clustering) - - Heatmaps (expression patterns) - - Volcano plots (significance vs fold change) + **Visualisation outputs** - PCA plots, heatmaps, and volcano plots - title_md: What files are included? description_md: | - **Raw Data** - - Demultiplexed FASTQ files (per sample) containing sequencing reads - - **Quality Control** - - FastQC reports (HTML) providing per-sample quality metrics - - MultiQC summary report aggregating quality results across all samples - - **Alignment** - - BAM files containing reads mapped to the reference genome - - **Quantification** - - Gene count matrix (.tsv or .txt) containing gene-level expression values - - Transcript abundance files (StringTie output) - - **Differential Expression** - - DEG results table (.tsv) including log fold change, p-values, and adjusted p-values - - **Visualisation** - - PCA plots, heatmaps, and volcano plots for downstream interpretation + | **Filename** | **Description** | + |-------------|----------------| + | Demultiplexed *.FASTQ files | Raw sequencing reads for each sample | + | fastqc_report.html / multiqc_report.html | Quality control reports | + | aligned_reads.bam | Reads aligned to the reference genome | + | gene_counts.tsv | Gene-level count matrix | + | transcript_abundance.tsv | Transcript abundance estimates (StringTie output) | + | deg_results.tsv | Differential expression results (logFC, p-values, adjusted p-values) | + | pca_plot.png | Sample clustering visualisation | + | heatmap.png | Expression pattern visualisation | + | volcano_plot.png | Significance vs fold-change visualisation | - title_md: File formats used description_md: | - **.fastq** - - Raw sequencing reads - - **.bam** - - Binary alignment file storing mapped reads - - **.tsv / .txt** - - Tabular files containing gene counts and statistical results - - **.html** - - Quality control reports (FastQC, MultiQC) - - **.csv** - - Processed results and downstream analysis tables (e.g., DEG results) - + | **Type** | **Description** | + |---------|----------------| + | .fastq | Raw sequencing reads | + | .bam | Binary alignment files storing mapped reads | + | .tsv / .txt | Tabular files containing counts and statistical results | + | .html | Quality control reports (FastQC, MultiQC) | + | .png | Visualisation outputs such as PCA, heatmaps, and volcano plots | - id: tools title: Tools content: From d7823970b5f769846f4a431f7d9bb518bbce43bd Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 24 Apr 2026 12:25:04 -0400 Subject: [PATCH 51/94] Update GBS section with tools, outputs, and bcftools filtering Added GBS workflow tools (Stacks pipeline) and integrated bcftools filtering for VCF reanalysis. Updated outputs and file format sections to reflect NGSEP-based results and downstream analysis steps. --- agrf/sections/gbs.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index 36fbe8e..7cc8177 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -7,12 +7,11 @@ tabs: content: - title_md: About the service description_md: | - Genotyping-by-sequencing (GBS/ddRADSeq) is used to identify genetic variants across multiple samples for population genetics and related studies. + Genotyping-by-sequencing (GBS/ddRADSeq) is used to identify genetic variants across multiple samples for population genetics studies. - AGRF currently runs GBS analysis using NGSEP because it is efficient for larger sample sets. However, NGSEP is not currently available in Galaxy. This Galaxy section therefore provides STACKS tools as an alternative option for users who want to reanalyse raw FASTQ data in Galaxy. - - Users may also import existing VCF outputs into Galaxy for downstream filtering and analysis using tools such as VCFtools and PLINK. + AGRF performs primary GBS analysis using NGSEP due to its efficiency for large sample sets. The outputs typically include variant call files (VCF) and consensus sequences (FASTA). + This Galaxy section supports downstream analysis of these outputs. Users can import VCF files into Galaxy and perform variant filtering, population genetic analysis, and visualisation using tools such as VCFtools and PLINK. - title_md: Results include description_md: | **Raw read data** - Demultiplexed sequencing reads in FASTQ format @@ -66,7 +65,6 @@ tabs: - label: Sequencing reads (FASTQ) datatypes: - fastqsanger - - fasta outputs: - label: Sample loci button_md: Run ustacks @@ -141,6 +139,11 @@ tabs: button_md: Run populations button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_populations%2Fstacks2_populations" + - title_md: bcftools filter - Filter variant data + description_md: Filter variant call files (VCF) based on minor allele frequency (MAF), missing data thresholds, and quality metrics for downstream population analysis. + button_md: Launch Tool + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbcftools_filter%2Fbcftools_filter" + - id: tutorials title: Tutorials content: From 7af23e917ab7d564fc29549399e4fc4e8d4c8309 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 1 May 2026 13:45:01 -0400 Subject: [PATCH 52/94] Add RNA-seq overview section - Added overview section - Included outputs and file formats from report --- agrf/sections/rnaseq.yml | 76 +++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 9af839e..89f4539 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -1,54 +1,56 @@ id: rnaseq -title: RNASeq +title: RNA-seq + tabs: - id: overview title: Overview content: - - title_md: About the service - description_md: | - RNA sequencing (RNASeq) is used to quantify gene expression and identify differentially expressed genes across biological conditions. - In this workflow, raw sequencing reads are quality checked, trimmed if required, and aligned to a reference genome using STAR. Gene-level quantification is performed using featureCounts or StringTie, followed by statistical analysis using edgeR or DESeq2. + - title_md: About the service + description_md: | + Illumina RNA-seq is used to quantify gene expression by sequencing transcriptomic RNA from biological samples. + + Sequencing is performed using paired-end reads (150 bp) on an Illumina NovaSeq platform. Raw sequencing data undergoes primary processing including demultiplexing and quality control to generate FASTQ files. - This pipeline enables identification of transcriptional changes, biological pathway alterations, and potential biomarkers associated with experimental conditions. + The data is processed through a standard RNA-seq analysis workflow, which includes trimming, alignment to a reference genome, transcript assembly, and gene-level quantification. Reads are aligned using the STAR aligner, and gene counts are summarised using featureCounts. - - title_md: Results include - description_md: | - **Raw read data** - Demultiplexed sequencing reads in FASTQ format + Optional downstream analysis includes differential gene expression using edgeR, enabling identification of significantly differentially expressed genes between experimental conditions. - **Quality control outputs** - FastQC reports and MultiQC summary + The final outputs provide gene expression measurements, alignment data, transcript structures, and statistical analysis results. - **Alignment outputs** - BAM files containing mapped reads + - title_md: Sequencing Output + description_md: | + **Read length:** 150 bp paired-end reads - **Quantification outputs** - Gene count matrices and transcript abundance estimates + **Number of reads:** Varies per sample (typically millions of reads per sample) - **Differential expression results** - Tables of differentially expressed genes (log fold change, p-values, adjusted p-values) + Raw sequencing reads are provided as FASTQ files containing sequence and quality score information. - **Visualisation outputs** - PCA plots, heatmaps, and volcano plots + - title_md: What files are included? + description_md: | - - title_md: What files are included? - description_md: | - | **Filename** | **Description** | - |-------------|----------------| - | Demultiplexed *.FASTQ files | Raw sequencing reads for each sample | - | fastqc_report.html / multiqc_report.html | Quality control reports | - | aligned_reads.bam | Reads aligned to the reference genome | - | gene_counts.tsv | Gene-level count matrix | - | transcript_abundance.tsv | Transcript abundance estimates (StringTie output) | - | deg_results.tsv | Differential expression results (logFC, p-values, adjusted p-values) | - | pca_plot.png | Sample clustering visualisation | - | heatmap.png | Expression pattern visualisation | - | volcano_plot.png | Significance vs fold-change visualisation | + | Filename | Description | + |----------|------------| + | Demultiplexed *.FASTQ files | Raw sequencing reads for each sample | + | *.bam | Aligned reads mapped to the reference genome | + | *.bam.bai | BAM index files for visualisation | + | features_summary.csv | Summary of mapping statistics with genome and exons | + | gene_counts.txt | Gene-level read count matrix | + | *.gtf | Transcript assembly (gene and isoform structure) | + | MDSplot.pdf | Sample clustering visualisation | + | Differential expression analysis results | MDS plots, box plots, library size plots, DE tables, smear plots, and heatmaps from edgeR analysis | + - title_md: File formats used + description_md: | - - title_md: File formats used - description_md: | - | **Type** | **Description** | - |---------|----------------| - | .fastq | Raw sequencing reads | - | .bam | Binary alignment files storing mapped reads | - | .tsv / .txt | Tabular files containing counts and statistical results | - | .html | Quality control reports (FastQC, MultiQC) | - | .png | Visualisation outputs such as PCA, heatmaps, and volcano plots | + | Type | Description | + |------|------------| + | .fastq / .fastq.gz | Raw sequencing reads | + | .bam | Binary alignment files storing mapped reads | + | .bai | Index files for BAM visualisation | + | .gtf | Transcript annotation and assembly | + | .txt / .csv | Gene counts and summary statistics | + | .pdf | Visualisation outputs | + - id: tools title: Tools content: @@ -162,7 +164,7 @@ tabs: - gff outputs: - label: Gene count matrix (TSV) - button_md: Run featureCounts + button_md: Run featureCounts button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffeaturecounts%2Ffeaturecounts" - title_md: StringTie - Assemble and quantify transcripts From 0bec3c3b85c76d69b03b9c4cfcdff8627b266dff Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 1 May 2026 13:46:52 -0400 Subject: [PATCH 53/94] Add metagenomics overview section - Added overview section - Included taxonomic and functional outputs - Fixed formatting issues --- agrf/sections/metagenomics.yml | 110 ++++++++++++++------------------- 1 file changed, 47 insertions(+), 63 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index ceb2a04..01a114b 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -5,69 +5,53 @@ tabs: - id: overview title: Overview content: - - title_md: About the service - description_md: | - Metagenomics analysis enables the study of microbial communities directly from environmental or host-associated samples without the need for culturing. - This workflow processes sequencing reads through assembly, quality assessment, genome binning, and taxonomic and functional annotation. - - Short-read assemblies are generated using MEGAHIT, while long-read assemblies are performed using meta-hifiasm. - Assembly quality is evaluated using QUAST, and genome binning is carried out using MetaBAT to reconstruct individual microbial genomes. - - Bin quality is assessed using CheckM2, which estimates genome completeness and contamination. - Taxonomic classification is performed using GTDB-Tk, and functional annotation is conducted using Bakta to identify genes and biological functions. - - This pipeline provides insights into microbial diversity, genome composition, and functional potential of complex microbial communities. - - - title_md: Results include - description_md: | - Metagenomics analysis generates multiple output files across different stages of the workflow. - - **Processed read data** - - Quality filtered sequencing reads (FASTQ files) - - **Assembly outputs** - - Contigs or assembled genomes representing microbial sequences - - **Assembly quality assessment** - - QUAST reports summarising assembly statistics (e.g., N50, contig length, total assembly size) - - **Genome binning** - - Genome bins representing reconstructed microbial genomes - - **Binning quality assessment** - - CheckM2 reports indicating completeness and contamination levels of genome bins - - **Taxonomic classification** - - GTDB-Tk results assigning taxonomy to genome bins - - **Functional annotation** - - Bakta outputs including gene predictions and functional annotations - - **Summary outputs** - - Tables and reports for downstream analysis and interpretation - - - title_md: What files are included? - description_md: | - | **Filename** | **Description** | - |-------------|----------------| - | Demultiplexed *.FASTQ files (per sample) | Raw sequencing reads for each sample | - | contigs.fasta | Assembled contigs generated from sequencing reads | - | quast_report.html | Assembly quality report including N50, contig length, and summary statistics | - | bins/*.fa | Genome bins representing reconstructed microbial genomes | - | checkm2_results.tsv | Genome bin quality metrics (completeness and contamination) | - | gtdbtk_classification.tsv | Taxonomic classification of genome bins using GTDB-Tk | - | bakta_annotations.tsv | Functional annotations including gene predictions and protein functions | - | summary_tables.tsv | Summary tables for downstream analysis and interpretation | - - - title_md: File formats used - description_md: | - | **Type** | **Description** | - |---------|----------------| - | .fastq | Raw sequencing reads | - | .fasta / .fa | Assembled contigs or genome bins | - | .tsv | Tabular files containing QC metrics, taxonomy, and annotations | - | .html | Quality reports (e.g., QUAST) | - | .gff | Gene annotations and genomic features | + + - title_md: About the service + description_md: | + Illumina metagenomic whole genome sequencing (MetaWGS) is used to characterise microbial communities by sequencing DNA directly from environmental or biological samples. + + Sequencing is performed using paired-end reads (150 bp) on an Illumina NovaSeq platform. Raw sequencing data undergoes quality control and filtering to remove adapter sequences and low-quality bases. + + The processed reads are analysed to determine the taxonomic composition and functional potential of the microbial community. Taxonomic profiling is performed using Kraken2 and Bracken, enabling classification of reads across different taxonomic levels. Functional profiling is performed using HUMAnN2, providing insights into gene families and metabolic pathways present in the samples. + + The final outputs include read count summaries, taxonomic classifications, functional profiling results, and interactive visualisations, allowing comprehensive analysis of microbial community structure and function. + + - title_md: Sequencing Output + description_md: | + **Read length:** 150 bp paired-end reads + + **Number of reads:** Varies per sample + + Raw sequencing reads are provided as FASTQ files containing sequence and quality score information. + + - title_md: What files are included? + description_md: | + + | Filename | Description | + |----------|------------| + | readCountsTable.tsv | Read counts for each sample across processing steps (raw, trimmed, classified) | + | taxonomic_profiling/{sample_name}/{sample_name}.kraken.report | Taxonomic classification results from Kraken2 | + | taxonomic_profiling/{sample_name}/{sample_name}.bracken.tsv | Refined taxonomic abundance estimates from Bracken | + | taxonomic_profiling/{sample_name}/{sample_name}.html | Interactive Krona plot for taxonomic composition | + | taxonomic_profiling/absolute_abundance.xlsx | Absolute abundance of taxa across samples | + | taxonomic_profiling/relative_abundance.xlsx | Relative abundance of taxa across samples | + | taxonomic_profiling/barplot/index.html | Interactive barplot visualisation of taxonomic composition | + | functional_profiling/{sample_name}_genefamilies.tsv | Gene family abundance per sample | + | functional_profiling/{sample_name}_pathabundance.tsv | Pathway abundance per sample | + | functional_profiling/{sample_name}_pathcoverage.tsv | Pathway coverage per sample | + | functional_profiling/humann2_genefamilies.tsv | Gene family count matrix across samples | + | functional_profiling/humann2_pathabundance.tsv | Pathway abundance matrix across samples | + | functional_profiling/humann2_pathcoverage.tsv | Pathway coverage matrix across samples | + + - title_md: File formats used + description_md: | + + | Type | Description | + |------|------------| + | .fastq / .fastq.gz | Raw sequencing reads | + | .tsv | Tab-separated files for read counts, taxonomy, and functional profiles | + | .xlsx | Excel files for abundance tables | + | .html | Interactive visualisations (Krona plots, barplots) | - id: tools title: Tools From f66a79852cc178808adc1f05cbbdd1d9e89e4983 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 1 May 2026 13:47:28 -0400 Subject: [PATCH 54/94] Add GBS overview section - Added overview section - Included FASTQ, VCF, consensus, QC outputs --- agrf/sections/gbs.yml | 53 +++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index 7cc8177..64d6a22 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -7,48 +7,41 @@ tabs: content: - title_md: About the service description_md: | - Genotyping-by-sequencing (GBS/ddRADSeq) is used to identify genetic variants across multiple samples for population genetics studies. + Genotyping-by-sequencing (GBS) is used to identify genetic variation across multiple samples by sequencing reduced representation regions of the genome generated through restriction enzyme digestion. - AGRF performs primary GBS analysis using NGSEP due to its efficiency for large sample sets. The outputs typically include variant call files (VCF) and consensus sequences (FASTA). + Library preparation is performed using a ddRAD-based protocol, which includes digestion of genomic DNA with restriction enzymes, ligation of barcoded adapters, size selection of fragments, and PCR amplification to generate sequencing-ready libraries. - This Galaxy section supports downstream analysis of these outputs. Users can import VCF files into Galaxy and perform variant filtering, population genetic analysis, and visualisation using tools such as VCFtools and PLINK. - - title_md: Results include - description_md: | - **Raw read data** - Demultiplexed sequencing reads in FASTQ format - - **Variant calls** - VCF files containing SNP and genotype information + Sequencing data is processed using the DeNovoGBS pipeline from NGSEP software. Raw sequencing reads are demultiplexed into individual samples based on barcode sequences. Reads are clustered based on sequence similarity, and variants are detected within each cluster using a probabilistic model. - **Consensus sequences** - FASTA files containing consensus sequences + Quality control metrics are generated throughout the analysis, including read counts per sample, clustering statistics, and similarity between samples. - **Filtered variant outputs** - VCF files filtered by missingness, minor allele frequency, or other criteria + The final outputs include variant calls, consensus sequences for each cluster, demultiplexed sequencing reads, and quality control visualisations, enabling downstream population genetics and diversity analyses. - **PLINK outputs** - Files prepared for downstream population genetic analysis + - title_md: Sequencing Output + description_md: | + **Number of samples:** 95 samples following demultiplexing - **Distance and visualisation outputs** - IBS distance matrix and heatmap-style outputs for sample comparison + Raw sequencing reads are provided as compressed FASTQ files for each individual sample after barcode-based demultiplexing. - title_md: What files are included? description_md: | - | **Filename** | **Description** | - |-------------|----------------| - | Demultiplexed *.FASTQ files | Raw sequencing reads for each sample | - | variants.vcf | Variant calls containing SNP and genotype information | - | consensus_sequences.fasta | Consensus sequences generated from variant/locus analysis | - | filtered_high_level.vcf | VCF filtered using stricter missingness and MAF thresholds | - | filtered_low_level.vcf | VCF filtered using less stringent missingness and MAF thresholds | - | populations.plink.* | PLINK-format files generated from VCF data | - | ibs_distance_matrix.tsv | Identity-by-state distance matrix for sample comparison | - | heatmap_output.html / .png | Visualisation output showing sample relatedness or clustering | + + | Filename | Description | + |----------|------------| + | *.fastq.gz | Demultiplexed sequencing reads for each sample | + | *_variants.vcf | Variant calls across all samples | + | *.fa | Consensus sequences for each read cluster | + | QC figures (.jpg) | Quality control visualisations (read counts, clustering, IBS heatmap) | - title_md: File formats used description_md: | - | **Type** | **Description** | - |---------|----------------| - | .fastq | Raw sequencing reads | - | .vcf | Variant call format containing SNP and genotype data | - | .fasta / .fa | Consensus sequences or assembled loci | - | .ped / .map | PLINK genotype input files | - | .tsv / .txt | Tabular outputs, filtering summaries, or distance matrices | - | .html / .png | Visualisation outputs such as heatmaps | + + | Type | Description | + |------|------------| + | .fastq.gz | Demultiplexed sequencing reads per sample | + | .vcf | Variant call format for genotype data | + | .fa | FASTA format for consensus sequences | + | .jpg | Quality control figures | - id: tools title: Tools From ab1c1d26c1fd10393820d3b62a52092ae5609da5 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 1 May 2026 16:10:19 -0400 Subject: [PATCH 55/94] Clarify GBS tool input/output descriptions Updated GBS tool inputs and outputs with specific file descriptions and sources (ustacks, cstacks, sstacks). Let me know if anything else is needed. --- agrf/sections/gbs.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index 64d6a22..044b3bc 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -67,7 +67,7 @@ tabs: description_md: | Create a catalog of loci across multiple samples. inputs: - - label: Loci from multiple samples + - label: Sample loci files generated by ustacks (combined across samples) datatypes: - tabular outputs: @@ -79,14 +79,14 @@ tabs: description_md: | Match each sample to the catalog of loci. inputs: - - label: Sample loci + - label: Sample loci files generated by ustacks (per sample) datatypes: - tabular - - label: Catalog of loci + - label: Catalog of loci file generated by cstacks datatypes: - tabular outputs: - - label: Matches to catalog + - label: Matches-to-catalog file generated by sstacks button_md: Run sstacks button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_sstacks%2Fstacks2_sstacks" @@ -94,17 +94,17 @@ tabs: description_md: | Convert TSV genotype data into BAM format for downstream analysis. inputs: - - label: Loci and polymorphism + - label: Loci and polymorphism file from STACKS workflow datatypes: - tabular - - label: Catalog of loci + - label: Catalog of loci file generated by cstacks datatypes: - tabular - - label: Matches to catalog + - label: Matches-to-catalog file generated by sstacks datatypes: - tabular outputs: - - label: BAM alignments + - label: BAM alignment files generated by tsv2bam button_md: Run tsv2bam button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_tsv2bam%2Fstacks2_tsv2bam" @@ -116,7 +116,7 @@ tabs: datatypes: - bam outputs: - - label: Variant calls and assembled loci + - label: Variant calls and assembled loci generated by gstacks button_md: Run gstacks button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_gstacks%2Fstacks2_gstacks" From dd8310c66e376642ce42a1e55723090bc85a4699 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 1 May 2026 16:20:26 -0400 Subject: [PATCH 56/94] Fix base.yml tracking and remove old_base.yml --- agrf/old_base.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 agrf/old_base.yml diff --git a/agrf/old_base.yml b/agrf/old_base.yml deleted file mode 100644 index e69de29..0000000 From af6585f18cdf15f7ca03f75f8de4cf709a613d8b Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 1 May 2026 16:52:55 -0400 Subject: [PATCH 57/94] Add base.yml and update gitignore Added agrf/base.yml and removed it from gitignore so it is tracked in the repo. --- .gitignore | 1 - agrf/base.yml | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 agrf/base.yml diff --git a/.gitignore b/.gitignore index 921816b..31effd3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,3 @@ python-3.14.3-amd64 (1).exe venv venv311 .DS_Stores -agrf/base.yml \ No newline at end of file diff --git a/agrf/base.yml b/agrf/base.yml new file mode 100644 index 0000000..5f2673f --- /dev/null +++ b/agrf/base.yml @@ -0,0 +1,36 @@ +# Default spec for a Lab page - this is a test +# Any variables defined here can be used in Md/HTML templates, like: +# "# Welcome to the Agrf" + +site_name: "Australia" +lab_name: AGRF Lab #this will be in caps, at right of logo +#or: use the word lab, in same font as agrf logo +galaxy_base_url: https://usegalaxy.org.au +subdomain: agrf +root_domain: usegalaxy.org.au + +# The files below must be accessible on the internet, relative to this file +# ----------------------------------------------------------------------------- +# Custom content relative to this file URL +header_logo: static/agrf-lab.png #logo at the left hand side +custom_css: static/custom.css +intro_md: templates/intro.md #intro text +footer_md: templates/footer.md +conclusion_md: templates/conclusion.md + +# Data to be rendered into sections/tabs/accordion elements: + +#the sections are the yml files + +sections: + - sections/data.yml + - sections/qualitycontrol.yml + - sections/microbial.yml + - sections/rnaseq.yml + - sections/metagenomics.yml + - sections/gbs.yml +# - sections/moreanalysis.yml + - sections/learn.yml +# - sections/help.yml +# - sections/agrf.yml + From 652a6d004df2fc2479f510d18c9e3473edf8a648 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 1 May 2026 21:08:25 -0400 Subject: [PATCH 58/94] Clarify GBS tool input and output descriptions Updated the GBS tool inputs and outputs with clearer file descriptions and source steps. Could you please review when you have time? --- agrf/sections/gbs.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index 044b3bc..7334681 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -59,7 +59,7 @@ tabs: datatypes: - fastqsanger outputs: - - label: Sample loci + - label: Sample loci TSV files generated by ustacks button_md: Run ustacks button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_ustacks%2Fstacks_ustacks" @@ -67,11 +67,11 @@ tabs: description_md: | Create a catalog of loci across multiple samples. inputs: - - label: Sample loci files generated by ustacks (combined across samples) + - label: Sample loci TSV files generated by ustacks (combined across samples) datatypes: - tabular outputs: - - label: Catalog of loci + - label: Catalog of loci TSV file generated by cstacks button_md: Run cstacks button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_cstacks%2Fstacks_cstacks" @@ -79,14 +79,14 @@ tabs: description_md: | Match each sample to the catalog of loci. inputs: - - label: Sample loci files generated by ustacks (per sample) + - label: Sample loci TSV files generated by ustacks (per sample) datatypes: - tabular - label: Catalog of loci file generated by cstacks datatypes: - tabular outputs: - - label: Matches-to-catalog file generated by sstacks + - label: Matches-to-catalog TSV file generated by sstacks button_md: Run sstacks button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_sstacks%2Fstacks2_sstacks" @@ -94,7 +94,7 @@ tabs: description_md: | Convert TSV genotype data into BAM format for downstream analysis. inputs: - - label: Loci and polymorphism file from STACKS workflow + - label: Loci and polymorphism TSV file generated by gstacks (STACKS workflow) datatypes: - tabular - label: Catalog of loci file generated by cstacks From 534998b1c3d9441cd257981ffeb665868996d4a6 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 1 May 2026 21:18:46 -0400 Subject: [PATCH 59/94] update gbs.yml Few modification in stacks section --- agrf/sections/gbs.yml | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index 7334681..6174a29 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -55,7 +55,7 @@ tabs: description_md: | Build loci from sequencing reads for each sample. inputs: - - label: Sequencing reads (FASTQ) + - label: Demultiplexed FASTQ reads for each sample datatypes: - fastqsanger outputs: @@ -82,7 +82,7 @@ tabs: - label: Sample loci TSV files generated by ustacks (per sample) datatypes: - tabular - - label: Catalog of loci file generated by cstacks + - label: Catalog of loci TSV file generated by cstacks datatypes: - tabular outputs: @@ -94,13 +94,13 @@ tabs: description_md: | Convert TSV genotype data into BAM format for downstream analysis. inputs: - - label: Loci and polymorphism TSV file generated by gstacks (STACKS workflow) + - label: Loci and polymorphism TSV files from previous STACKS steps datatypes: - tabular - - label: Catalog of loci file generated by cstacks + - label: Catalog of loci TSV file generated by cstacks datatypes: - tabular - - label: Matches-to-catalog file generated by sstacks + - label: Matches-to-catalog TSV file generated by sstacks datatypes: - tabular outputs: @@ -112,7 +112,7 @@ tabs: description_md: | Assemble loci, align reads, and perform variant calling. inputs: - - label: BAM alignments + - label: BAM alignment files generated by tsv2bam datatypes: - bam outputs: @@ -124,16 +124,22 @@ tabs: description_md: | Generate population-level statistics and export results for downstream analysis. inputs: - - label: Variant calls / loci + - label: Variant calls and assembled loci generated by gstacks datatypes: - vcf outputs: - - label: Population statistics and export files + - label: Population statistics and export files generated by populations button_md: Run populations button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_populations%2Fstacks2_populations" - title_md: bcftools filter - Filter variant data description_md: Filter variant call files (VCF) based on minor allele frequency (MAF), missing data thresholds, and quality metrics for downstream population analysis. + inputs: + - label: Variant call file generated by NGSEP, gstacks, or another variant calling workflow + datatypes: + - vcf + outputs: + - label: Filtered VCF file for downstream population analysis button_md: Launch Tool button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbcftools_filter%2Fbcftools_filter" From 90270535b92af1acd5f33fe6fbabba1a24fb1d17 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 11:17:34 -0400 Subject: [PATCH 60/94] Improve microbial workflow descriptions and tool clarity Refined microbial section descriptions, inputs, and outputs for better usability and readability. --- agrf/sections/microbial.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index aecbbb5..4de5879 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -7,7 +7,7 @@ tabs: content: - title_md: About the service description_md: | - PacBio HiFi full-length 16S data is quality filtered and denoised to high quality amplicon single variants (ASVs) using QIIME2 and DADA2. ASV classification is performed using two approaches. We perform a consensus alignment classification (using VSEARCH) against the Genome Taxonomy Database (GTDB r207). This approach should give high consistency. We also perform naïve Bayesian machine learning based classification (DADA2) using three databases that successively fall over to the next one if a species level match is not found. In order, they are the Genome Taxonomy Database (GTDB r207), the SILVA rRNA database (v138), and the NCBI RefSeq 16S rRNA database supplemented by the Ribosomal Database Project (RDP). This should give better classification for low abundance ASVs + PacBio HiFi full-length 16S data is quality filtered and denoised to high quality amplicon single variants (ASVs) using QIIME2 and DADA2. ASV classification is performed using two approaches. We perform a consensus alignment classification (using VSEARCH) against the Genome Taxonomy Database (GTDB r207). This approach should give high consistency. We also perform naïve Bayesian machine learning based classification (DADA2) using three databases that successively fall back to the next database if a species level match is not found. In order, they are the Genome Taxonomy Database (GTDB r207), the SILVA rRNA database (v138), and the NCBI RefSeq 16S rRNA database supplemented by the Ribosomal Database Project (RDP). This should give better classification for low abundance ASVs ##### Results include @@ -30,7 +30,7 @@ tabs: **Number of Reads:** Typically greater than 50K reads per sample. - Read output varies depending on the composition and quality of the sample. Analyses provided by AGRF capps reads at 50K/sample. All reads captured during sequencing are available in the FASTQ files. Reviewing the rarefaction curves can provide insight to whether running an analysis with uncapped reads will provide more information or not. + Read output varies depending on the composition and quality of the sample. Analyses provided by AGRF caps reads at 50K/sample. All reads captured during sequencing are available in the FASTQ files. Reviewing the rarefaction curves can provide insight to whether running an analysis with uncapped reads will provide more information or not. - title_md: What files are included? description_md: | @@ -76,7 +76,7 @@ tabs: | Type | Description | | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------- | | `.qza`
QIIME Zipped Artifact | Zip file containing folders for data and data provenance. Can be opened with any zip tool or extracted with QIIME2 exact. | - | `.qzv`
QIIME Zipped Visualization | Zip file containing data and data provenance. Can be visualized with [QIIME 2 View](https://view.qiime2.org/). | + | `.qzv`
QIIME Zipped Visualization | Zip file containing data and data provenance. Can be visualised with [QIIME 2 View](https://view.qiime2.org/). | | `.biom`
Biological Observation Matrix | Can be imported into popular packages such as phyloseq for downstream processing and visualization. (https://biom-format.org/). | | `.tsv`
tab separated values | Table in text file format. Can be opened in any text editor or imported into Excel. | @@ -88,7 +88,7 @@ tabs: content: - title_md: 16S HiFi workflow - description_md: PacBio data + description_md: PacBio HiFi full-length 16S sequencing workflow for microbiome analysis. button_md: Request support button_link: https://usegalaxy.org.au/workflows/run?id=c0a59c4ff4fcc845 @@ -375,7 +375,7 @@ tabs: - title_md: qiime2 diversity pcoa - Principal coordinates analysis description_md: | - Perform principal coordinates analysis (PCoA) on a beta diversity distance matrix to visualize the relationships between samples in a reduced dimensional space. + Perform principal coordinates analysis (PCoA) on a beta diversity distance matrix to visualise the relationships between samples in a reduced dimensional space. inputs: - label: QIIME 2 Artifact file - Distance Matrix From 0a82f07b1cabd687930e5166f2570f778172954c Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 11:18:07 -0400 Subject: [PATCH 61/94] Improve RNA-seq section structure, descriptions, and tool links Added clearer input/output descriptions, filtering, normalization, and visualization sections in the RNA-seq workflow. Updated tool links and improved workflow organisation for better usability and clarity. --- agrf/sections/rnaseq.yml | 48 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 89f4539..0fb7cde 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -53,7 +53,7 @@ tabs: - id: tools title: Tools - content: + content: subsections: - id: qc @@ -188,9 +188,29 @@ tabs: - title_md: Sample filtering description_md: | Remove outlier samples or unwanted groups before analysis. + inputs: + - label: Gene count matrix + datatypes: + - tabular + - label: Sample metadata + datatypes: + - tabular + outputs: + - label: Filtered gene count matrix + button_md: Run Filter + button_link: "{{ galaxy_base_url }}/?tool_id=Filter1" + - title_md: Filter lowly expressed genes description_md: | Remove genes with low counts across samples to improve statistical power in differential expression analysis. + inputs: + - label: Gene count matrix + datatypes: + - tabular + outputs: + - label: Filtered gene count matrix + button_md: Run Filter + button_link: "{{ galaxy_base_url }}/?tool_id=Filter1" - id: normalization title: Normalization @@ -198,7 +218,18 @@ tabs: - title_md: Normalization description_md: | Adjusts for differences in sequencing depth and library size between samples to allow accurate comparison of gene expression. - + inputs: + - label: Gene count matrix + datatypes: + - tabular + - label: Sample metadata + datatypes: + - tabular + outputs: + - label: Normalized expression matrix + button_md: Run DESeq2 + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fdeseq2%2Fdeseq2" + - id: analysis title: Differential expression content: @@ -238,6 +269,14 @@ tabs: - title_md: PCA - Visualise sample clustering description_md: | Explore sample clustering using principal component analysis. + inputs: + - label: Normalised expression matrix + datatypes: + - tsv + outputs: + - label: PCA plot + button_md: Run PCA + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fggplot2_pca%2Fggplot2_pca" - title_md: Heatmap - Visualise gene expression patterns description_md: | @@ -246,6 +285,8 @@ tabs: - label: Expression matrix datatypes: - tsv + outputs: + - label: Heatmap visualisation button_md: Run Heatmap button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fggplot2_heatmap2%2Fggplot2_heatmap2" @@ -256,10 +297,11 @@ tabs: - label: Differential expression results (TSV) datatypes: - tsv + outputs: + - label: Volcano plot visualisation button_md: Run Volcano plot button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fvolcanoplot%2Fvolcanoplot" - - id: tutorials title: Tutorials heading_md: From 5e3d1d73eb23c3e5fffa5ee85478659d48802369 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 11:29:50 -0400 Subject: [PATCH 62/94] Improve metagenomics workflow descriptions and tool organisation Updated metagenomics section with clearer workflow structure, improved input/output descriptions, corrected datatypes, and added/updated tool links for assembly, binning, taxonomy, and functional annotation workflows. --- agrf/sections/metagenomics.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index 01a114b..fc2ed88 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -67,7 +67,7 @@ tabs: inputs: - label: Sequencing reads (FASTQ) datatypes: - - fastq + - fastqsanger outputs: - label: Assembled contigs (FASTA) button_md: Run MEGAHIT @@ -79,8 +79,12 @@ tabs: inputs: - label: Long reads (FASTQ) datatypes: - - fastq - + - fastqsanger + outputs: + - label: Assembled contigs (FASTA) + button_md: Run metaSPAdes + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fspades%2Fspades" + - id: assembly_qc title: Assembly quality control content: @@ -106,7 +110,7 @@ tabs: - label: Assembled contigs (FASTA) datatypes: - fasta - - label: BAM files (mapped reads) + - label: BAM files generated by mapping reads back to assembled contigs datatypes: - bam outputs: From 0ed091f429db0453efb558c06c0d04b3021197e2 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 11:30:43 -0400 Subject: [PATCH 63/94] updated RNA section Made few changes --- agrf/sections/rnaseq.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 0fb7cde..e41b5be 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -191,10 +191,10 @@ tabs: inputs: - label: Gene count matrix datatypes: - - tabular + - tsv - label: Sample metadata datatypes: - - tabular + - tsv outputs: - label: Filtered gene count matrix button_md: Run Filter @@ -206,7 +206,7 @@ tabs: inputs: - label: Gene count matrix datatypes: - - tabular + - tsv outputs: - label: Filtered gene count matrix button_md: Run Filter @@ -221,10 +221,10 @@ tabs: inputs: - label: Gene count matrix datatypes: - - tabular + - tsv - label: Sample metadata datatypes: - - tabular + - tsv outputs: - label: Normalized expression matrix button_md: Run DESeq2 From c402d5da0f69e6b351729486513c2d2021f3f24b Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 11:40:20 -0400 Subject: [PATCH 64/94] Improved GBS workflow descriptions by clarifying VCF outputs and variant file inputs in STACKS workflow steps. Updated GBS section descriptions to clearly identify VCF outputs from gstacks and VCF inputs used in bcftools filter for improved workflow clarity. --- agrf/sections/gbs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index 6174a29..27f1159 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -116,7 +116,7 @@ tabs: datatypes: - bam outputs: - - label: Variant calls and assembled loci generated by gstacks + - label: Variant calls (VCF) and assembled loci generated by gstacks button_md: Run gstacks button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_gstacks%2Fstacks2_gstacks" @@ -135,7 +135,7 @@ tabs: - title_md: bcftools filter - Filter variant data description_md: Filter variant call files (VCF) based on minor allele frequency (MAF), missing data thresholds, and quality metrics for downstream population analysis. inputs: - - label: Variant call file generated by NGSEP, gstacks, or another variant calling workflow + - label: Variant call file (VCF) generated by NGSEP, gstacks, or another variant calling workflow datatypes: - vcf outputs: From 28b44f3fb5eb1654e5581197e4b4b4439d9c1ced Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 12:11:35 -0400 Subject: [PATCH 65/94] Improve metagenomics workflow with taxonomic profiling support Added Kraken2 taxonomic profiling workflow to the metagenomics section and refined workflow organisation, tool descriptions, and input/output clarity for improved usability and consistency. --- agrf/sections/metagenomics.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index fc2ed88..99896de 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -99,6 +99,22 @@ tabs: - label: Assembly quality report button_md: Run QUAST button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fquast%2Fquast" + + - id: taxonomicprofiling + title: Taxonomic profiling + content: + + - title_md: Kraken2 - Taxonomic classification + description_md: | + Classify metagenomic reads against reference databases to identify microbial taxa present in the samples. + inputs: + - label: FASTQ reads for metagenomic classification + datatypes: + - fastqsanger + outputs: + - label: Taxonomic classification reports + button_md: Run Kraken2 + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fkraken2%2Fkraken2" - id: binning title: Binning From 7a9d8133d6e8ca46e3daa14969cd46e4e1b0e295 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 12:17:32 -0400 Subject: [PATCH 66/94] Changed minor errors in microbial Corrected spelling errors --- agrf/sections/microbial.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 4de5879..49a62ad 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -39,7 +39,7 @@ tabs: | **Raw Data** | | | Demultiplexed \*.FASTQ file (1 file per sample) | Contains sequencing reads for each sample | | **Sequencing Metrics** | | - | 📈 samples.demux.summary.qzv | QIIME visualization summarizing.
a) Per-sample sequence counts (inc min, median, mean max, frequency histogram).
b) Quality (interactive plot of read length by quality score show seven-number quality summary plus sequence length summary). | + | 📈 samples.demux.summary.qzv | QIIME visualisation summarizing.
a) Per-sample sequence counts (inc min, median, mean max, frequency histogram).
b) Quality (interactive plot of read length by quality score show seven-number quality summary plus sequence length summary). | | 🖹 per-sample-fastq-counts.tsv | Per-sample sequence counts | | **Rarefaction** | | | 📈 alpha-rarefaction-curves.qzv | Rarefaction curve showing feature saturation for each sample. | @@ -66,18 +66,18 @@ tabs: | 🖹 unweighted_unifrac_distance_matrix.tsv
🗜️ unweighted_unifrac_distance_matrix.qza | UniFrac metrics are phylogenetically aware, treating closely related organisms as more similar. UniFrac metrics utilize a shared phylogenetic tree to calculate distance is based on the fraction of unshared branch lengths. Unweighted UniFrac uses only the presence or absence of taxa. | | 🖹 weighted_unifrac_distance_matrix.tsv
🗜️ weighted_unifrac_distance_matrix.qza | UniFrac metric additionally weights branch length with the abundance difference between the two samples. | | **PCA results and EMPeror plots** | | - | 🗜️ jaccard_pcoa_results.qza
📈 jaccard_emperor.qzv | PCA results and visualization of the Jaccard distance. | - | 🗜️ bray_curtis_pcoa_results.qza
📈 bray_curtis_emperor.qzv | PCA results and visualization of the Bray-Curtis distance. | - | 🗜️ unweighted_unifrac_pcoa_results.qza
📈 unweighted_unifrac_emperor.qzv | PCA results and visualization of the unweighted UniFrac distance. | - | 🗜️ `weighted_unifrac_pcoa_results.qza`
📈 weighted_unifrac_emperor.qzv | PCA results and visualization of the weighted UniFrac distance. | + | 🗜️ jaccard_pcoa_results.qza
📈 jaccard_emperor.qzv | PCA results and visualisation of the Jaccard distance. | + | 🗜️ bray_curtis_pcoa_results.qza
📈 bray_curtis_emperor.qzv | PCA results and visualisation of the Bray-Curtis distance. | + | 🗜️ unweighted_unifrac_pcoa_results.qza
📈 unweighted_unifrac_emperor.qzv | PCA results and visualisation of the unweighted UniFrac distance. | + | 🗜️ `weighted_unifrac_pcoa_results.qza`
📈 weighted_unifrac_emperor.qzv | PCA results and visualisation of the weighted UniFrac distance. | - title_md: File formats used description_md: | | Type | Description | | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------- | | `.qza`
QIIME Zipped Artifact | Zip file containing folders for data and data provenance. Can be opened with any zip tool or extracted with QIIME2 exact. | - | `.qzv`
QIIME Zipped Visualization | Zip file containing data and data provenance. Can be visualised with [QIIME 2 View](https://view.qiime2.org/). | - | `.biom`
Biological Observation Matrix | Can be imported into popular packages such as phyloseq for downstream processing and visualization. (https://biom-format.org/). | + | `.qzv`
QIIME Zipped Visualisation | Zip file containing data and data provenance. Can be visualised with [QIIME 2 View](https://view.qiime2.org/). | + | `.biom`
Biological Observation Matrix | Can be imported into popular packages such as phyloseq for downstream processing and visualisation. (https://biom-format.org/). | | `.tsv`
tab separated values | Table in text file format. Can be opened in any text editor or imported into Excel. | @@ -141,7 +141,7 @@ tabs: - qza button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__feature_table__summarize%2Fqiime2__feature_table__summarize" - - title_md: QIIME vizualisation extractor - Visualise .qzv files in Galaxy. + - title_md: QIIME visualisation extractor - Visualise .qzv files in Galaxy. description_md: | Use this tool to visualisae .qzv files within Galaxy. inputs: @@ -150,7 +150,7 @@ tabs: - qzv button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fqiime_extract_viz%2Fqiime_extract_viz" - - title_md: QIMME 2 View - Visualise .qza or .qzv files using an online tool + - title_md: QIIME 2 View - Visualise .qza or .qzv files using an online tool description_md: | Drag and drop QIIME 2 Artifact or Visualisation files from your computer. Best used with .qzv files. While this tool can load .qza files to extract information like citations, providence and metadata, it does not extract the data stored in these files. inputs: From dda258e61036e8337544166c7020da0a3f504229 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 12:32:49 -0400 Subject: [PATCH 67/94] minor update --- agrf/sections/rnaseq.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index e41b5be..5e7d681 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -90,7 +90,7 @@ tabs: inputs: - label: Sequencing reads (FASTQ) datatypes: - - fastq + - fastqsanger outputs: - label: Trimmed reads (FASTQ) button_md: Run Trim Galore From a64ed46558dc4d7916f9a850422dd347589d9baa Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 13:24:38 -0400 Subject: [PATCH 68/94] Corrected button link for metaSPAdes --- agrf/sections/metagenomics.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index 99896de..f4dc314 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -83,7 +83,7 @@ tabs: outputs: - label: Assembled contigs (FASTA) button_md: Run metaSPAdes - button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fspades%2Fspades" + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fnml%2Fmetaspades%2Fmetaspades" - id: assembly_qc title: Assembly quality control From cd8758030601de58098e443dec01b93a88094074 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 15:39:01 -0400 Subject: [PATCH 69/94] Improve GBS input file documentation Updated GBS workflow input descriptions with typical STACKS filenames and clarified expected input files across workflow steps. --- agrf/sections/gbs.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index 27f1159..16aa143 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -67,7 +67,7 @@ tabs: description_md: | Create a catalog of loci across multiple samples. inputs: - - label: Sample loci TSV files generated by ustacks (combined across samples) + - label: Sample loci files from ustacks (sample.tags.tsv.gz) datatypes: - tabular outputs: @@ -79,10 +79,10 @@ tabs: description_md: | Match each sample to the catalog of loci. inputs: - - label: Sample loci TSV files generated by ustacks (per sample) + - label: Sample loci files from ustacks (sample.tags.tsv.gz) datatypes: - tabular - - label: Catalog of loci TSV file generated by cstacks + - label: Catalog loci file from cstacks (catalog.tags.tsv.gz) datatypes: - tabular outputs: @@ -94,13 +94,13 @@ tabs: description_md: | Convert TSV genotype data into BAM format for downstream analysis. inputs: - - label: Loci and polymorphism TSV files from previous STACKS steps + - label: Loci and polymorphism files from STACKS (*.tags.tsv.gz, *.snps.tsv.gz, *.alleles.tsv.gz) datatypes: - tabular - - label: Catalog of loci TSV file generated by cstacks + - label: Catalog loci file from cstacks (catalog.tags.tsv.gz) datatypes: - tabular - - label: Matches-to-catalog TSV file generated by sstacks + - label: Matches-to-catalog file from sstacks (sample.matches.tsv.gz) datatypes: - tabular outputs: From 5dd0990bd580913590eb25e41f36cd00499fa032 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 16:42:52 -0400 Subject: [PATCH 70/94] Create Help and Contact section Consolidated Galaxy help, support, and contact resources into a dedicated Help and Contact section. Added links for Galaxy Training Network, AGRF support, and Galaxy Australia resources. --- agrf/sections/help_contact.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 agrf/sections/help_contact.yml diff --git a/agrf/sections/help_contact.yml b/agrf/sections/help_contact.yml new file mode 100644 index 0000000..0a048ed --- /dev/null +++ b/agrf/sections/help_contact.yml @@ -0,0 +1,29 @@ +id: help +title: Help and Contact + +tabs: + - id: overview + title: Overview + heading_md: + + content: + - title_md: Galaxy Training Network + description_md: | + Access Galaxy tutorials, training materials, and workflow documentation for RNA-seq, metagenomics, microbial profiling, and GBS analysis. + + button_md: Open Tutorials + button_link: "https://training.galaxyproject.org/training-material/" + + - title_md: AGRF Support + description_md: | + Contact AGRF for workflow guidance, bioinformatics support, and troubleshooting assistance related to AGRF Galaxy Lab services. + + button_md: AGRF Website + button_link: "https://www.agrf.org.au/" + + - title_md: Galaxy Australia + description_md: | + Access Galaxy Australia workflows, tools, and platform resources for bioinformatics analysis. + + button_md: Open Galaxy Australia + button_link: "https://usegalaxy.org.au/" \ No newline at end of file From a1609353c52a94c9a30e5f491271b7f1504ffe66 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 16:43:25 -0400 Subject: [PATCH 71/94] Clean up Import Data section Removed duplicated help content from the Import Data section. Kept upload and file compression guidance only to simplify the section structure. --- agrf/sections/data.yml | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/agrf/sections/data.yml b/agrf/sections/data.yml index 550aac9..50a9bce 100644 --- a/agrf/sections/data.yml +++ b/agrf/sections/data.yml @@ -28,16 +28,4 @@ tabs: # button_md: Galaxy tutorials # button_link: "https://training.galaxyproject.org/" - - id: help - title: Help - heading_md: - content: - - - title_md: Galaxy Australia support - description_md: Request support, new tools, or additional storage. - button_md: Request support - button_link: /request/support - - title_md: AGRF support - description_md: Contact AGRF for more help with your data. - button_md: Contact AGRF - button_link: /request/support + \ No newline at end of file From ccbbfe95c832ab56d9c0e00e92821b29257d2bc4 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 16:44:03 -0400 Subject: [PATCH 72/94] Simplify intro and footer content Updated intro navigation links and simplified footer content. Removed unused placeholder logos, extra contact links, and contributor display content for a cleaner interface. --- agrf/templates/conclusion.md | 70 ++---------------------------------- agrf/templates/intro.md | 13 +++---- 2 files changed, 8 insertions(+), 75 deletions(-) diff --git a/agrf/templates/conclusion.md b/agrf/templates/conclusion.md index 966ead6..e6e650a 100644 --- a/agrf/templates/conclusion.md +++ b/agrf/templates/conclusion.md @@ -1,69 +1,5 @@ -Thanks for checking out the Galaxy {{ site_name }} {{ lab_name }}! +Thanks for checking out the Galaxy {{ site_name }} {{ lab_name }}! +AGRF is the [Australian Genome Research Facility](https://www.agrf.org.au/). -

- AGRF is the Australian Genome Research Facility. -
- - Order more sequencing from AGRF - -

- - - [Contact Galaxy Australia](https://site.usegalaxy.org.au/request). - - [Connect with the Australian genomics community](https://www.biocommons.org.au/genomics-domain). - - -
-
- Logo 1 -
-
- Logo 2 -
-
- Logo 3 -
-
- +For support, training resources, or contact information, please use the **Help and Contact** section above. \ No newline at end of file diff --git a/agrf/templates/intro.md b/agrf/templates/intro.md index 2d91132..3e55388 100644 --- a/agrf/templates/intro.md +++ b/agrf/templates/intro.md @@ -73,24 +73,21 @@ then add Section, so data becomes dataSection - + - From 51170f55d1b95a5064eb3877d6ea095dfc460b5d Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 16:45:10 -0400 Subject: [PATCH 73/94] Remove old help section and contributor file Removed the previous help.yml section and deleted unused contributor content. Updated section references in base.yml to use the new Help and Contact section. --- agrf/CONTRIBUTORS | 1 - agrf/sections/help.yml | 14 -------------- 2 files changed, 15 deletions(-) delete mode 100644 agrf/CONTRIBUTORS delete mode 100644 agrf/sections/help.yml diff --git a/agrf/CONTRIBUTORS b/agrf/CONTRIBUTORS deleted file mode 100644 index b0047fa..0000000 --- a/agrf/CONTRIBUTORS +++ /dev/null @@ -1 +0,0 @@ -None diff --git a/agrf/sections/help.yml b/agrf/sections/help.yml deleted file mode 100644 index 5ec0827..0000000 --- a/agrf/sections/help.yml +++ /dev/null @@ -1,14 +0,0 @@ -id: help -title: Galaxy Help -tabs: - - id: overview - title: Overview - heading_md: - content: - - title_md: abc - description_md: | - * abc - - - button_md: Upload data - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=upload1" \ No newline at end of file From 271a835b021b475ee9b79dab8843c8bcb617f9ad Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 8 May 2026 16:46:02 -0400 Subject: [PATCH 74/94] Update base.yml section references Updated base.yml configuration files to replace the old help section reference with the new Help and Contact section. --- agrf/base.yml | 6 ++++-- agrf/static/local/base.yml | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/agrf/base.yml b/agrf/base.yml index 5f2673f..839ba6b 100644 --- a/agrf/base.yml +++ b/agrf/base.yml @@ -29,8 +29,10 @@ sections: - sections/rnaseq.yml - sections/metagenomics.yml - sections/gbs.yml -# - sections/moreanalysis.yml - sections/learn.yml -# - sections/help.yml + - sections/help_contact.yml +# - sections/moreanalysis.yml + + # - sections/agrf.yml diff --git a/agrf/static/local/base.yml b/agrf/static/local/base.yml index 5f2673f..caed40e 100644 --- a/agrf/static/local/base.yml +++ b/agrf/static/local/base.yml @@ -29,8 +29,8 @@ sections: - sections/rnaseq.yml - sections/metagenomics.yml - sections/gbs.yml -# - sections/moreanalysis.yml - sections/learn.yml -# - sections/help.yml -# - sections/agrf.yml - + - sections/help_contact.yml +# - sections/moreanalysis.yml + +# From 6e5b0dcd050c278d2c77cd3562524b415ea09fe3 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 14 May 2026 15:44:46 -0400 Subject: [PATCH 75/94] Improve microbial tool descriptions and outputs Updated microbial profiling tool descriptions to better explain tool functionality, expected outputs, and common output file formats. Added clearer user-friendly descriptions for diversity analysis, taxonomy visualisation, ordination, differential abundance, functional prediction, sequence alignment, and phylogenetic analysis tools. --- agrf/sections/microbial.yml | 96 ++++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 32 deletions(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 49a62ad..d2594e8 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -134,7 +134,9 @@ tabs: - title_md: qiime2 feature-table summarize - Summarize feature table description_md: | - Generate a summary of a feature table, including counts per sample and feature frequency distribution. + Generate a summary of a feature table, including sequence counts per sample, feature frequency distribution, and sequencing depth information. This tool helps assess dataset quality before downstream diversity and taxonomic analysis. + + Output: Interactive summary visualisations and sequencing depth statistics for each sample (.qzv). inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: @@ -143,7 +145,9 @@ tabs: - title_md: QIIME visualisation extractor - Visualise .qzv files in Galaxy. description_md: | - Use this tool to visualisae .qzv files within Galaxy. + Extract and visualise the contents of QIIME 2 `.qzv` visualisation files directly within Galaxy. This tool allows users to inspect interactive visualisation outputs without downloading files locally. + + Output: Extracted visualisation contents displayed directly within Galaxy from QIIME visualisation files (.qzv). inputs: - label: QIIME 2 Visualisation file datatypes: @@ -152,7 +156,9 @@ tabs: - title_md: QIIME 2 View - Visualise .qza or .qzv files using an online tool description_md: | - Drag and drop QIIME 2 Artifact or Visualisation files from your computer. Best used with .qzv files. While this tool can load .qza files to extract information like citations, providence and metadata, it does not extract the data stored in these files. + Visualise QIIME 2 `.qza` and `.qzv` files using the online QIIME 2 View platform. This tool helps users explore interactive visualisations, metadata, provenance information, and analysis summaries. + + Output: Interactive online visualisations for QIIME 2 artifact and visualisation files (.qza, .qzv). inputs: - label: QIIME 2 Artifact or visualisation file datatypes: @@ -165,8 +171,9 @@ tabs: - title_md: qiime2 feature-table filter-samples - Filter samples using metadata description_md: | - Filter samples using metadata to remove outliers or exclude specific groups before analysis. - + Filter samples from a feature table using metadata criteria to remove unwanted samples or outliers before downstream analysis. This tool is commonly used to retain specific sample groups for diversity and taxonomic analysis. + + Output: A filtered feature table containing only the selected samples (.qza). inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: @@ -209,6 +216,7 @@ tabs: Use this tool to calculate other non-phylogenetic alpha diversity metrics. + Output: Alpha diversity values for each sample stored as QIIME 2 artifacts (.qza). inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: @@ -235,6 +243,8 @@ tabs: * Faith's Phylogenetic Distance Use this tool to calculate other phylogenetic alpha diversity metrics. + + Output: Phylogenetic diversity values for each sample stored as QIIME 2 artifacts (.qza). inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] @@ -245,8 +255,9 @@ tabs: - title_md: qiime2 diversity alpha-correlation - Correlate alpha diversity with sample metadata description_md: | - Determine whether numeric sample metadata columns are correlated with alpha diversity. - + Determine whether numeric sample metadata variables are correlated with alpha diversity values across samples. This analysis helps identify relationships between microbial diversity and experimental or environmental metadata variables. + + Output: Statistical correlation results and visualisations showing relationships between alpha diversity metrics and metadata variables (.qzv). inputs: - label: QIIME 2 Artifact file - Alpha Diversity datatypes: @@ -260,7 +271,9 @@ tabs: - title_md: qiime2 diversity alpha-group-significance - Correlate alpha diversity with groups in sample metadata description_md: | - Visually and statistically compare groups of alpha diversity values. + Statistically compare alpha diversity values between groups defined in the sample metadata. This tool helps determine whether microbial diversity differs significantly between experimental groups or sample categories. + + Output: Boxplots and statistical significance results comparing alpha diversity between groups (.qzv). inputs: - label: QIIME 2 Artifact file - Alpha Diversity @@ -280,6 +293,8 @@ tabs: It does this multiple times per depth to account for random variation (controlled by the --p-iterations parameter). The result is a curve for each sample showing diversity vs. sampling effort. + Output: Rarefaction curves showing diversity trends across sequencing depths for each sample (.qzv). + inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: @@ -294,14 +309,14 @@ tabs: - title_md: Details description_md: | - Beta diversity measures the diversity *between* samples. There are a number of different metrics used. AGRF's analysis includes four beta diversity metrics (stored within separate .qza files). + Beta diversity measures the diversity *between* samples. There are a number of different metrics used. AGRF's analysis includes four beta diversity metrics (stored within separate .qza files). - `observed_features_vector.qza` - Sample richness per sample. A count of the number of features (i.e. species) observed per sample. - `shannon_vector.qza` - Shannon entropy (i.e. Shannon index) for each sample. This is a quantitative measure of community richness (number of species present) and evenness. Specifically, it quantifies the uncertainty in predicting the species of an individual microbe (or effectively a read) taken at random from the sample. - `evenness_vector.qza` - Pielous evenness index for each sample. A measure of how close in numbers (sequence counts) each species in a sample is. It is the ratio of the Shannon index to the maximum possible Shannon index if every species was equally likely. Value between 0 and 1. The closer to 1 the more even. - `faith_pd_vector.qza` - Faiths phylogenetic distance. A phylogenetically aware alpha diversity metric. Equal to the sum of all branch lengths of the phylogenetic tree that spans all members of the sample. The higher the number the greater the diversity. - Each .qza file contains alpha-diversity.tsv which can be extracted in Galaxy using the `qiime tools export` tool. The .tsv contains two columns: `[sample name.fastq]` , `[alpha diversity metric]` + Each .qza file contains alpha-diversity.tsv which can be extracted in Galaxy using the `qiime tools export` tool. The .tsv contains two columns: `[sample name.fastq]` , `[alpha diversity metric]` inputs: @@ -312,13 +327,9 @@ tabs: - title_md: qiime2 diversity-beta - Calculate beta diversity (non-phylogenetic) description_md: | - Non-phylogenetic beta diversity metrics provide a general overview of diversity based on counts or proportions. Common examples (included in AGRF's analysis) are: - - * Observed features (richness) - * Shannon Index (richness + evenness) - * Pielou's Evenness (how equally distributed species are) - - Use this tool to calculate other non-phylogenetic beta diversity metrics. + Calculate beta diversity metrics to measure differences in microbial community composition between samples. These metrics help identify similarities or differences between microbial communities across samples or experimental groups. + + Output: Beta diversity distance matrices for downstream ordination and clustering analysis (.qza). inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] @@ -330,11 +341,11 @@ tabs: - title_md: qiime2 diversity beta-phylogenetic - Calculate beta diversity (with a phylogenetic tree) description_md: | - Phylogenetic beta diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included in AGRF's analysis) is: + Phylogenetic beta diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included in AGRF's analysis) is: - * Faith's Phylogenetic Distance + * Faith's Phylogenetic Distance - Use this tool to calculate other phylogenetic beta diversity metrics. + Use this tool to calculate other phylogenetic beta diversity metrics. inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] @@ -345,8 +356,9 @@ tabs: - title_md: qiime2 diversity beta-correlation - Correlate beta diversity with sample metadata description_md: | - Determine whether numeric sample metadata columns are correlated with beta diversity. - + Determine whether numeric sample metadata variables are correlated with beta diversity differences between samples. This analysis helps identify associations between microbial community composition and metadata variables. + + Output: Statistical correlation results describing relationships between beta diversity distances and metadata variables (.qzv). inputs: - label: QIIME 2 Artifact file - Beta Diversity datatypes: @@ -360,7 +372,9 @@ tabs: - title_md: qiime2 diversity beta-group-significance - Correlate beta diversity with groups in sample metadata description_md: | - Visually and statistically compare groups of beta diversity values. + Statistically compare microbial community composition between groups defined in the sample metadata. This analysis is commonly performed using methods such as PERMANOVA to evaluate whether groups differ significantly in community structure. + + Output: Statistical significance results and visualisations comparing beta diversity between groups (.qzv). inputs: - label: QIIME 2 Artifact file - Beta Diversity @@ -375,7 +389,9 @@ tabs: - title_md: qiime2 diversity pcoa - Principal coordinates analysis description_md: | - Perform principal coordinates analysis (PCoA) on a beta diversity distance matrix to visualise the relationships between samples in a reduced dimensional space. + Perform principal coordinates analysis (PCoA) on a beta diversity distance matrix to visualise the relationships between samples in a reduced dimensional space. This tool helps identify clustering patterns, sample similarity, and potential separation between experimental groups. + + Output: Principal coordinate values and ordination results for sample clustering visualisation (.qza). inputs: - label: QIIME 2 Artifact file - Distance Matrix @@ -400,7 +416,9 @@ tabs: - title_md: qiime2 composition ancom - Differential abundance of taxa description_md: | - Identify taxa that are differentially abundant between groups using ANCOM. + Identify taxa that are differentially abundant between sample groups using ANCOM. This tool helps detect microbial taxa that show significant abundance differences across experimental conditions or metadata categories. + + Output: Statistical results and visualisations identifying differentially abundant taxa between groups (.qzv). inputs: - label: Feature Table @@ -414,7 +432,9 @@ tabs: - title_md: PICRUSt2 metagenome prediction - Predict microbial functions description_md: | - Predict microbial functional profiles such as gene families and metabolic pathways from microbial community data. + Predict microbial functional profiles from 16S rRNA sequencing data. This tool estimates potential gene families and metabolic pathways based on the observed microbial community composition. + + Output: Predicted functional pathway and gene family abundance tables for downstream analysis (.tsv, .qza). inputs: - label: Sequence abundance table (OTUs or ASVs) datatypes: @@ -423,7 +443,9 @@ tabs: - title_md: Krona - Interactive taxonomic visualisation description_md: | - Krona generates interactive hierarchical visualisations of taxonomic abundance. + Generate interactive hierarchical visualisations of microbial taxonomy and abundance data. This tool allows users to explore microbial composition across multiple taxonomic levels. + + Output: Interactive zoomable taxonomic visualisations for microbial community exploration (.html). inputs: - label: Taxonomy classification table datatypes: @@ -433,7 +455,9 @@ tabs: - title_md: qiime2 feature-table heatmap - Visualise feature abundance as a heatmap description_md: | - Generate a heatmap representation of a feature table to visualise abundance patterns across samples. + Visualise feature abundance patterns across samples using a heatmap. This tool helps identify highly abundant features and differences in feature distribution between samples or groups. + + Output: Interactive heatmaps showing feature abundance across samples (.qzv). inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: @@ -442,7 +466,9 @@ tabs: - title_md: qiime2 taxa barplot - Visualise taxonomic composition description_md: | - Generate interactive stacked bar plots showing the relative abundance of taxa across samples. + Visualise microbial taxonomic composition and relative abundance across samples. This tool helps compare community structure at different taxonomic levels between samples or groups. + + Output: Interactive stacked bar plots showing taxonomic abundance across samples (.qzv). inputs: - label: Feature table with taxonomy datatypes: @@ -460,19 +486,25 @@ tabs: content: - title_md: BLAST - Identify sequences using database search description_md: | - Compare sequences against reference databases to identify closest matching species. + Compare query sequences against reference databases to identify similar sequences and possible species matches. This tool is useful for validating species identity or checking sequence similarity against known references. + + Output: Alignment results showing matched reference sequences, similarity scores, and sequence identity (.txt, .tsv). button_md: Run BLAST button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu/repos/q2d2/qiime2__feature_classifier__classify_consensus_blast/qiime2__feature_classifier__classify_consensus_blast" - title_md: MAFFT - Multiple sequence alignment description_md: | - Align sequences with references for phylogenetic analysis. + Align multiple nucleotide sequences with reference sequences for comparison and phylogenetic analysis. This tool helps identify conserved and variable regions across sequences. + + Output: Aligned sequence files for downstream phylogenetic analysis (.fasta). button_md: Run MAFFT button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu/repos/q2d2/qiime2__alignment__mafft/qiime2__alignment__mafft" - title_md: FastTree - Build phylogenetic tree description_md: | - Construct phylogenetic trees to compare samples with known species. + Build approximately maximum-likelihood phylogenetic trees from aligned sequence data. This tool helps compare evolutionary relationships between sequences or expected species. + + Output: Phylogenetic tree files for visualising sequence relatedness (.nwk). button_md: Run FastTree button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu/repos/iuc/fasttree/fasttree" From 370292766cb909d5193fb1dddd2c36f86ef878df Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Thu, 14 May 2026 16:44:09 -0400 Subject: [PATCH 76/94] Improve RNA-seq tool descriptions and outputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updated RNA-seq tool descriptions to explain each tool’s purpose, expected outputs, and common output file formats. Improved clarity for QC, trimming, alignment, quantification, filtering, normalisation, differential expression, and visualisation tools. --- agrf/sections/rnaseq.yml | 65 +++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index 5e7d681..d5c98f7 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -61,7 +61,9 @@ tabs: content: - title_md: FastQC - Assess sequencing read quality description_md: | - Assess raw RNASeq read quality including base quality scores, GC content and adapter contamination. + Assess the quality of raw RNA-seq reads before downstream analysis. This tool checks base quality scores, GC content, sequence length distribution, duplication levels, and possible adapter contamination. + + Output: Produces HTML quality reports and summary statistics for each FASTQ file (.html, .zip). inputs: - label: Sequencing reads (FASTQ) datatypes: @@ -73,7 +75,9 @@ tabs: - title_md: MultiQC - Aggregate QC reports description_md: | - Aggregate QC reports across multiple samples into a single summary report. + Combine quality control reports from multiple samples into one summary report. This tool is useful for comparing FastQC, trimming, alignment, and other QC results across all samples in a project. + + Output: Produces a single interactive summary report across samples (.html). inputs: - label: FastQC reports datatypes: @@ -86,7 +90,9 @@ tabs: - title_md: Trim Galore - Trim adapters and low-quality bases description_md: | - Remove adapter sequences and low-quality bases from RNASeq reads before alignment. + Remove adapter sequences and low-quality bases from RNA-seq reads before alignment. This step improves read quality and helps reduce alignment errors caused by technical sequence contamination. + + Output: Produces trimmed sequencing reads for downstream analysis (.fastq.gz). inputs: - label: Sequencing reads (FASTQ) datatypes: @@ -98,7 +104,9 @@ tabs: - title_md: sortmerna - Remove rRNA contamination description_md: | - Remove rRNA reads from RNASeq data. + Identify and remove ribosomal RNA reads from RNA-seq data. This step helps reduce unwanted rRNA contamination so that downstream analysis focuses on informative transcriptomic reads. + + Output: Produces filtered sequencing reads with reduced rRNA content (.fastq.gz). inputs: - label: Sequencing reads (FASTQ) datatypes: @@ -108,13 +116,14 @@ tabs: button_md: Run sortmerna button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Frnateam%2Fsortmerna%2Fbg_sortmerna" - - id: alignment title: Alignment content: - title_md: STAR - Align reads to reference genome description_md: | - Align RNASeq reads to a reference genome using STAR. + Align RNA-seq reads to a reference genome using the STAR aligner. This step maps sequencing reads to genomic locations and prepares the data for downstream quantification and transcript analysis. + + Output: Produces aligned read files and alignment statistics (.bam, .sam, .tab). inputs: - label: RNASeq reads (FASTQ) @@ -136,7 +145,9 @@ tabs: content: - title_md: samtools sort - Sort BAM files description_md: | - Sort aligned reads (BAM) by genomic coordinates. + Sort aligned BAM files by genomic coordinates. Sorting is required for many downstream tools including indexing, visualisation, and read quantification workflows. + + Output: Produces sorted alignment files suitable for downstream analysis (.bam). inputs: - label: Aligned reads (BAM) @@ -152,7 +163,9 @@ tabs: content: - title_md: featureCounts - Count reads per gene description_md: | - Convert aligned reads (BAM files) into a gene count matrix required for downstream differential expression analysis. + Count the number of reads assigned to genes using aligned BAM files and a gene annotation file. This step converts mapped reads into a gene-level count matrix for statistical analysis. + + Output: Produces gene count tables for downstream filtering, normalisation, and differential expression analysis (.txt, .tsv). inputs: - label: Aligned reads (BAM) @@ -169,7 +182,9 @@ tabs: - title_md: StringTie - Assemble and quantify transcripts description_md: | - Assemble transcripts and estimate gene expression from aligned RNASeq reads. + Assemble transcripts and estimate transcript abundance from aligned RNA-seq reads. This tool can reconstruct transcript structures and support gene or isoform-level expression analysis. + + Output: Produces transcript assembly and expression estimate files (.gtf, .tab). inputs: - label: Aligned reads (BAM) datatypes: @@ -187,7 +202,9 @@ tabs: content: - title_md: Sample filtering description_md: | - Remove outlier samples or unwanted groups before analysis. + Remove unwanted samples, outliers, or selected sample groups before downstream analysis. This step helps ensure that only relevant samples are included in expression analysis and visualisation. + + Output: Produces filtered count matrices and metadata tables containing selected samples (.tsv, .txt). inputs: - label: Gene count matrix datatypes: @@ -202,7 +219,9 @@ tabs: - title_md: Filter lowly expressed genes description_md: | - Remove genes with low counts across samples to improve statistical power in differential expression analysis. + Remove genes with very low read counts across samples before statistical testing. Filtering lowly expressed genes helps reduce noise and improves the reliability of differential expression analysis. + + Output: Produces filtered gene count matrices suitable for downstream analysis (.tsv, .txt). inputs: - label: Gene count matrix datatypes: @@ -217,7 +236,9 @@ tabs: content: - title_md: Normalization description_md: | - Adjusts for differences in sequencing depth and library size between samples to allow accurate comparison of gene expression. + Adjust gene counts to account for differences in sequencing depth and library size between samples. Normalisation makes gene expression values more comparable across samples before visualisation or differential expression analysis. + + Output: Produces normalised expression values or internally normalised count matrices for downstream statistical analysis (.tsv, .txt). inputs: - label: Gene count matrix datatypes: @@ -235,7 +256,9 @@ tabs: content: - title_md: edgeR - Differential expression analysis description_md: | - Differential gene expression analysis. + Perform differential expression analysis to identify genes with significant expression differences between sample groups. edgeR is commonly used for RNA-seq count data and models biological variation between replicates. + + Output: Produces differential expression result tables containing log fold change, p-values, and adjusted significance values (.tsv, .txt). inputs: - label: Gene count matrix datatypes: @@ -250,7 +273,9 @@ tabs: - title_md: DESeq2 - Differential gene expression analysis description_md: | - Identify differentially expressed genes between groups. + Identify differentially expressed genes between sample groups using RNA-seq count data. DESeq2 includes normalisation and statistical testing steps to compare gene expression across experimental conditions. + + Output: Produces differential expression result tables with log fold change, p-values, and adjusted significance values (.tsv, .txt). inputs: - label: Gene count matrix datatypes: @@ -268,7 +293,9 @@ tabs: content: - title_md: PCA - Visualise sample clustering description_md: | - Explore sample clustering using principal component analysis. + Explore sample clustering using principal component analysis based on expression values. PCA helps identify major sources of variation, outliers, and whether samples group according to biological or experimental conditions. + + Output: Produces PCA plots showing sample relationships and clustering patterns (.png, .pdf, .html). inputs: - label: Normalised expression matrix datatypes: @@ -280,7 +307,9 @@ tabs: - title_md: Heatmap - Visualise gene expression patterns description_md: | - Display gene expression patterns across samples. + Visualise gene expression patterns across samples using a heatmap. Heatmaps help identify clusters of genes or samples with similar expression profiles. + + Output: Produces heatmap visualisations showing expression patterns across selected genes and samples (.png, .pdf, .html). inputs: - label: Expression matrix datatypes: @@ -292,7 +321,9 @@ tabs: - title_md: Volcano plot - Visualise differential expression results description_md: | - Visualise differential expression results (log fold change vs significance). + Visualise differential expression results by plotting log fold change against statistical significance. Volcano plots help identify genes with large expression changes and strong statistical support. + + Output: Produces volcano plot visualisations highlighting significantly upregulated and downregulated genes (.png, .pdf, .html). inputs: - label: Differential expression results (TSV) datatypes: From 5e7f7ff0756899f1be4f9b413c375d1a43e4e87c Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 15 May 2026 10:52:16 -0400 Subject: [PATCH 77/94] Improve metagenomics tool descriptions and outputs Updated metagenomics tool descriptions to better explain tool functionality, expected outputs, and common output file formats. Improved user-friendly descriptions for assembly, assembly quality assessment, taxonomic classification, genome binning, genome quality assessment, and functional annotation workflows. --- agrf/sections/metagenomics.yml | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index f4dc314..39f61ae 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -63,7 +63,9 @@ tabs: content: - title_md: MEGAHIT - Assemble short-reads into contigs description_md: | - Assemble short reads into contigs for metagenomic analysis. + Assemble short-read metagenomic sequencing data into longer contiguous sequences (contigs). MEGAHIT is designed for large and complex metagenomic datasets and helps reconstruct microbial genomes from environmental sequencing reads. + + Output: Produces assembled contig sequences for downstream binning and annotation (.fasta). inputs: - label: Sequencing reads (FASTQ) datatypes: @@ -75,7 +77,9 @@ tabs: - title_md: metaSPAdes / meta-hifiasm - Assemble long-reads or hybrid reads description_md: | - Assemble long-read or hybrid metagenomic data (e.g., PacBio). + Assemble long-read or hybrid metagenomic sequencing data into high-quality contigs. These assemblers are designed for complex microbial communities and can improve genome reconstruction using long-read or hybrid sequencing approaches. + + Output: Produces assembled contig sequences and assembly graphs for downstream analysis (.fasta, .gfa). inputs: - label: Long reads (FASTQ) datatypes: @@ -90,7 +94,9 @@ tabs: content: - title_md: QUAST - Assess assembly quality description_md: | - Assess the quality of assembled contigs, including length, completeness and fragmentation. + Assess the quality of metagenomic assemblies by calculating statistics such as contig length, N50, GC content, completeness, and fragmentation. This tool helps evaluate whether assembled contigs are suitable for downstream binning and annotation. + + Output: Produces assembly quality reports and summary statistics (.html, .txt, .tsv). inputs: - label: Assembled contigs (FASTA) datatypes: @@ -106,7 +112,9 @@ tabs: - title_md: Kraken2 - Taxonomic classification description_md: | - Classify metagenomic reads against reference databases to identify microbial taxa present in the samples. + Classify metagenomic sequencing reads against reference databases to identify microbial taxa present in the samples. This tool enables rapid taxonomic profiling of microbial communities from environmental or biological samples. + + Output: Produces taxonomic classification reports and abundance summaries (.txt, .tsv). inputs: - label: FASTQ reads for metagenomic classification datatypes: @@ -121,7 +129,9 @@ tabs: content: - title_md: MetaBAT2 - Bin contigs into genomes description_md: | - Group assembled contigs into genome bins representing individual microbial genomes. + Group assembled contigs into genome bins representing individual microbial genomes based on sequence composition and read coverage patterns. This step helps reconstruct metagenome-assembled genomes (MAGs) from complex microbial communities. + + Output: Produces genome bins for downstream quality assessment and taxonomic analysis (.fasta). inputs: - label: Assembled contigs (FASTA) datatypes: @@ -139,7 +149,9 @@ tabs: content: - title_md: CheckM2 - Assess completeness and contamination description_md: | - Assess genome bin quality by estimating completeness and contamination. + Assess the quality of genome bins by estimating genome completeness and contamination. This tool helps determine whether reconstructed microbial genomes are suitable for downstream taxonomic and functional analysis. + + Output: Produces genome quality assessment reports containing completeness and contamination estimates (.tsv, .txt). inputs: - label: Genome bins (FASTA) @@ -156,7 +168,9 @@ tabs: content: - title_md: GTDB-Tk - Assign taxonomy to genome bins description_md: | - Assign taxonomy to genome bins using the Genome Taxonomy Database. + Assign taxonomy to genome bins using the Genome Taxonomy Database (GTDB). This tool classifies bacterial and archaeal genomes based on phylogenetic placement and reference genome comparison. + + Output: Produces taxonomic classification tables and phylogenetic placement results (.tsv, .tree). inputs: - label: Genome bins datatypes: @@ -171,8 +185,9 @@ tabs: content: - title_md: Bakta - Functional annotation of genomes description_md: | - Annotate assembled genomes and bins to identify genes and functional features. - This helps in understanding the biological roles of microbial communities. + Annotate assembled genomes and genome bins to identify genes, coding sequences, RNAs, and functional features. This tool helps interpret the biological functions and metabolic potential of microbial communities. + + Output: Produces annotated genome files and functional annotation reports (.gff3, .gbff, .faa, .tsv). inputs: - label: Genome bins or contigs (FASTA) datatypes: @@ -181,6 +196,7 @@ tabs: - label: Annotated genomes and functional features button_md: Run Bakta button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbakta%2Fbakta%2F1.9.4%2Bgalaxy1&version=latest" + - id: tutorials title: Tutorials content: From f5d98f75b536cc90e64f5241db4d21f5004288aa Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 15 May 2026 11:06:44 -0400 Subject: [PATCH 78/94] Improve GBS tool descriptions and outputs Updated GBS workflow tool descriptions to better explain tool functionality, expected outputs, and common output file formats. Improved user-friendly descriptions for STACKS population genomics workflow tools including locus assembly, catalogue generation, SNP calling, genotype analysis, and variant filtering. --- agrf/sections/gbs.yml | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index 16aa143..1b0eccf 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -53,7 +53,9 @@ tabs: - title_md: ustacks - Build loci for each sample description_md: | - Build loci from sequencing reads for each sample. + Assemble short-read sequences into putative loci for each sample and identify SNPs within stacks of matching reads. This is the first step in the STACKS workflow and is used to build loci from raw GBS or ddRAD sequencing reads. + + Output: Produces loci assemblies, SNP calls, haplotype information, and model files for each sample (.tags.tsv.gz, .snps.tsv.gz, .alleles.tsv.gz, .models.tsv.gz). inputs: - label: Demultiplexed FASTQ reads for each sample datatypes: @@ -65,7 +67,9 @@ tabs: - title_md: cstacks - Create catalog of loci description_md: | - Create a catalog of loci across multiple samples. + Create a catalogue of consensus loci by combining loci identified across multiple samples. This catalogue acts as a reference set of loci for downstream matching and population analysis. + + Output: Produces catalogue loci and SNP information shared across samples (.catalog.tags.tsv.gz, .catalog.snps.tsv.gz). inputs: - label: Sample loci files from ustacks (sample.tags.tsv.gz) datatypes: @@ -77,7 +81,9 @@ tabs: - title_md: sstacks - Match samples to catalog description_md: | - Match each sample to the catalog of loci. + Match loci from individual samples against the catalogue of consensus loci generated by cstacks. This step identifies shared loci between samples and the catalogue for downstream population analysis. + + Output: Produces catalogue matching files describing loci matches for each sample (.matches.tsv.gz). inputs: - label: Sample loci files from ustacks (sample.tags.tsv.gz) datatypes: @@ -92,7 +98,9 @@ tabs: - title_md: tsv2bam - Convert TSV to BAM description_md: | - Convert TSV genotype data into BAM format for downstream analysis. + Convert STACKS locus information into BAM format while incorporating population map information. This step prepares aligned loci information for downstream genotype calling and population genetics analysis. + + Output: Produces BAM alignment files and associated population mapping information (.bam). inputs: - label: Loci and polymorphism files from STACKS (*.tags.tsv.gz, *.snps.tsv.gz, *.alleles.tsv.gz) datatypes: @@ -110,7 +118,9 @@ tabs: - title_md: gstacks - Assemble loci and call variants description_md: | - Assemble loci, align reads, and perform variant calling. + Assemble loci, align reads, and perform SNP and genotype calling across all samples in the population. This step generates population-level variant information from aligned STACKS data. + + Output: Produces genotype calls, assembled loci, and population-level SNP information (.vcf, .tsv.gz). inputs: - label: BAM alignment files generated by tsv2bam datatypes: @@ -122,7 +132,9 @@ tabs: - title_md: populations - Population genetics analysis description_md: | - Generate population-level statistics and export results for downstream analysis. + Perform population genetics analysis and export filtered variant datasets for downstream analysis. This tool calculates population statistics, applies locus filtering, and prepares data for diversity and population structure analysis. + + Output: Produces filtered SNP datasets, population statistics, and export files for downstream analysis (.vcf, .tsv, .structure, .phylip). inputs: - label: Variant calls and assembled loci generated by gstacks datatypes: @@ -133,7 +145,10 @@ tabs: button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_populations%2Fstacks2_populations" - title_md: bcftools filter - Filter variant data - description_md: Filter variant call files (VCF) based on minor allele frequency (MAF), missing data thresholds, and quality metrics for downstream population analysis. + description_md: | + Filter variant call files (VCF) based on minor allele frequency (MAF), missing data thresholds, depth, and quality metrics. This step helps retain high-confidence variants for downstream population genetics and diversity analysis. + + Output: Produces filtered variant datasets suitable for downstream analysis (.vcf). inputs: - label: Variant call file (VCF) generated by NGSEP, gstacks, or another variant calling workflow datatypes: From 8228014ab316f4e27e411110eb9250905ebe3ba6 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 15 May 2026 11:18:24 -0400 Subject: [PATCH 79/94] Improve import data section descriptions and links Updated the Import Data section with clearer and more user-friendly instructions for uploading and compressing sequencing files in Galaxy. Improved formatting, terminology, and guidance for FASTQ/FASTA file handling and added a working Galaxy compression tool link. --- agrf/sections/data.yml | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/agrf/sections/data.yml b/agrf/sections/data.yml index 50a9bce..b087809 100644 --- a/agrf/sections/data.yml +++ b/agrf/sections/data.yml @@ -1,31 +1,37 @@ id: import title: Import data + tabs: - id: overview title: Overview - heading_md: + heading_md: + content: + - title_md: Get AGRF data into Galaxy description_md: | - * Find your fastq data at the [AGRF Hub](https://www.agrf.org.au/my-agrf-hub). - * Download this data to your computer. - * Upload data from your computer to Galaxy. - * Tip: only import the files that you need. + * Find your FASTQ data at the [AGRF Hub](https://www.agrf.org.au/my-agrf-hub). + * Download the required files to your computer. + * Upload data from your computer into Galaxy. + * Tip: only import the files that you need for your analysis. + + *Note: Galaxy and the AGRF Hub are not configured for long-term data storage. Ensure raw and processed data are backed up to a secure archive.* - *Note: Galaxy and the AGRF Hub are not configured for long-term data storage. Ensure you save raw and processed data to a secure archive.* button_md: Upload data button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=upload1" + - title_md: Compress large files description_md: | - * After uploading data, we recommend compressing fastq and fasta files. - * This will save you time and storage space. + * After uploading data, we recommend compressing FASTQ and FASTA files. + * Compression reduces storage usage and can improve upload and download performance. + button_md: Compress data - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=https://toolshed.g2.bx.psu.edu/repos/iuc/compress_file" #does this need additional text eg %2Frepos% etc? + button_link: "https://usegalaxy.org.au/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fcompress_file%2Fcompress_file%2F0.1.0&version=latest" + # - title_md: Learn how to use Galaxy # description_md: | - # * Galaxy Australia is free to use. - # * Get started with these user-friendly tutorials. + # * Galaxy Australia is free to use. + # * Get started with these user-friendly tutorials. + # # button_md: Galaxy tutorials - # button_link: "https://training.galaxyproject.org/" - - \ No newline at end of file + # button_link: "https://training.galaxyproject.org/" \ No newline at end of file From 80d4801dba8dfc040ed26d82b83966d0f76d44e5 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 15 May 2026 12:41:36 +0800 Subject: [PATCH 80/94] Improve quality control section descriptions and workflows Updated the Quality Control section with clearer and more user-friendly descriptions for Illumina and PacBio quality control workflows. Improved explanations for FastQC, FastP, format conversion, and HiFi quality control workflows, including expected outputs and common output file formats. --- agrf/sections/qualitycontrol.yml | 89 ++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 39 deletions(-) diff --git a/agrf/sections/qualitycontrol.yml b/agrf/sections/qualitycontrol.yml index 66f69c3..b3c6cd0 100644 --- a/agrf/sections/qualitycontrol.yml +++ b/agrf/sections/qualitycontrol.yml @@ -1,78 +1,87 @@ + id: qc title: Quality control + tabs: - id: help title: Overview - heading_md: > + heading_md: content: - title_md: What is quality control? description_md: | - Assess the quality and characteristics of your raw sequencing data, and perform data cleaning prior to analysis. + Quality control helps assess raw sequencing data before analysis. It can identify issues such as low-quality reads, adapter contamination, unusual sequence composition, or format problems. + This step helps decide whether data cleaning, trimming, filtering, or format conversion is needed before running downstream workflows. button_md: Quality Control Tutorial button_link: https://training.galaxyproject.org/training-material/topics/sequence-analysis/tutorials/quality-control/tutorial.html + - title_md: What tools and workflows can I use for quality control? description_md: | - * Galaxy has a range of tools available to perform quality control. - - * Select your datatype in the tabs above to see relevant tools and workflows. + * Galaxy provides tools and workflows for checking read quality, trimming adapters, filtering low-quality reads, and summarising QC results. + * Select the relevant datatype tab above to view Illumina or PacBio quality control options. + - title_md: Converting data to different formats description_md: | - Some tools require data in different formats. - Learn about converting between bam, fastq and fastqsanger formats, and compressing/uncompressing files. + Some tools require data in specific formats. You may need to convert between BAM, FASTQ, and FASTQ-Sanger formats, or compress and uncompress files before analysis. button_md: Tutorial - button_link: add link + button_link: https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/upload-rules/tutorial.html - id: Illumina title: Illumina data - heading_md: + heading_md: content: - title_md: FastQC - produce sequence quality reports - description_md: > -

- Generate a range of reports about raw data quality and characteristics. -

+ description_md: | + Assess the quality of raw Illumina sequencing reads before downstream analysis. FastQC reports quality scores, GC content, sequence length distribution, duplication levels, and possible adapter contamination. + + Output: Produces per-sample sequence quality reports and summary statistics (.html, .zip). inputs: - datatypes: - fastq - fastq.gz button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" + - title_md: FastP - produce sequence quality reports, trimming & filtering - description_md: > -

- This tool is faster than FastQC, and can also trim reads and filter by quality. -

+ description_md: | + Perform read quality control, adapter trimming, and filtering for Illumina sequencing reads. FastP can remove low-quality bases, trim adapters, filter poor-quality reads, and generate quality reports. + + Output: Produces cleaned FASTQ reads and quality control reports (.fastq.gz, .html, .json). inputs: - datatypes: - fastqsanger - fastqsanger.gz button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffastp%2Ffastp" + - title_md: QC of Illumina data - workflow - description_md: > -

- QC of paired-end Illumina data.

Tools: FastQC MultiQC -

+ description_md: | + Run a paired-end Illumina quality control workflow using FastQC and MultiQC. This workflow checks read quality across samples and combines QC results into a single summary report. + + Output: Produces FastQC reports and a combined MultiQC summary report (.html, .zip). button_link: https://genome.usegalaxy.org.au/u/anna/w/qc-illumina-data button_tip: Run workflow - - - id: Pacbio - title: Pacbio data - heading_md: + + - id: Pacbio + title: PacBio data + heading_md: content: - - title_md: Convert BAM files to Fastq - workflow - description_md: > -

- Run this workflow first if your raw data is in css.bam format.

Tools: SamToFastq Samtools flagstat -

- button_link: link - button_tip: Run workflow + - title_md: Convert BAM files to FASTQ - workflow + description_md: | + Convert PacBio raw read files from BAM format into FASTQ format for downstream analysis. + Run this workflow first if your raw PacBio data is supplied in ccs.bam format. + + Tools: SamToFastq Samtools flagstat - - title_md: Remove Hifi adapters - workflow - description_md: > -

- Filter for adapter contamination.

Tools: HiFiAdapterFilt -

- button_link: add link - button_tip: Run workflow + Output: Produces FASTQ reads and alignment summary statistics (.fastq, .txt). + + + - title_md: Remove HiFi adapters - workflow + description_md: | + Detect and remove adapter-contaminated PacBio HiFi reads before downstream assembly or microbial analysis. + This step improves read quality by filtering sequences that may contain residual adapter contamination. + + Tools: HiFiAdapterFilt + + Output: Produces filtered HiFi read files suitable for downstream analysis (.fastq, .fasta). + # - id: Nanopore # title: Nanopore data @@ -100,3 +109,5 @@ tabs: # - fastqsanger # button_link: add tool + + From 3e571dea7b50b23601a0868d275f026f71a41355 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 15 May 2026 12:45:26 +0800 Subject: [PATCH 81/94] Improve microbial section descriptions and outputs Updated microbial profiling descriptions, outputs, and datatype information for improved clarity and user guidance. --- agrf/sections/microbial.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index d2594e8..91998a1 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -7,15 +7,15 @@ tabs: content: - title_md: About the service description_md: | - PacBio HiFi full-length 16S data is quality filtered and denoised to high quality amplicon single variants (ASVs) using QIIME2 and DADA2. ASV classification is performed using two approaches. We perform a consensus alignment classification (using VSEARCH) against the Genome Taxonomy Database (GTDB r207). This approach should give high consistency. We also perform naïve Bayesian machine learning based classification (DADA2) using three databases that successively fall back to the next database if a species level match is not found. In order, they are the Genome Taxonomy Database (GTDB r207), the SILVA rRNA database (v138), and the NCBI RefSeq 16S rRNA database supplemented by the Ribosomal Database Project (RDP). This should give better classification for low abundance ASVs + PacBio HiFi full-length 16S data is quality filtered and denoised to high quality amplicon single variants (ASVs) using QIIME2 and DADA2. ASV classification is performed using two approaches. We perform a consensus alignment classification (using VSEARCH) against the Genome Taxonomy Database (GTDB r207). This approach should give high consistency. We also perform naïve Bayesian machine learning based classification (DADA2) using three databases that successively fall back to the next database if a species level match is not found. In order, they are the Genome Taxonomy Database (GTDB r207), the SILVA rRNA database (v138), and the NCBI RefSeq 16S rRNA database supplemented by the Ribosomal Database Project (RDP). This approach may improve classification sensitivity for low-abundance ASVs. ##### Results include **HiFi read data** - - Demultiplexed .FASTQ files (per sample). + - Demultiplexed FASTQ files (.fastq). - **A summary report (html):** + **A summary report (.html):** - Project summary QC statistics - Table of quality filtered read counts (per sample) @@ -30,7 +30,7 @@ tabs: **Number of Reads:** Typically greater than 50K reads per sample. - Read output varies depending on the composition and quality of the sample. Analyses provided by AGRF caps reads at 50K/sample. All reads captured during sequencing are available in the FASTQ files. Reviewing the rarefaction curves can provide insight to whether running an analysis with uncapped reads will provide more information or not. + Read output varies depending on sample composition and sequencing quality. Analyses provided by AGRF caps reads at 50K/sample. All reads captured during sequencing are available in the FASTQ files. Reviewing the rarefaction curves can provide insight to whether running an analysis with uncapped reads will provide more information or not. - title_md: What files are included? description_md: | From 0cfbc5276424000acc245f4108ba0914e2f87c7b Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 15 May 2026 13:00:55 +0800 Subject: [PATCH 82/94] Improve metagenomics section descriptions and outputs Updated metagenomics workflow descriptions, outputs, and datatype information for improved clarity, consistency, and user guidance. --- agrf/sections/metagenomics.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index 39f61ae..aaef8a0 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -8,11 +8,11 @@ tabs: - title_md: About the service description_md: | - Illumina metagenomic whole genome sequencing (MetaWGS) is used to characterise microbial communities by sequencing DNA directly from environmental or biological samples. + Illumina metagenomic sequencing (MetaWGS) is used to characterise microbial communities by sequencing DNA directly from environmental or biological samples. Sequencing is performed using paired-end reads (150 bp) on an Illumina NovaSeq platform. Raw sequencing data undergoes quality control and filtering to remove adapter sequences and low-quality bases. - The processed reads are analysed to determine the taxonomic composition and functional potential of the microbial community. Taxonomic profiling is performed using Kraken2 and Bracken, enabling classification of reads across different taxonomic levels. Functional profiling is performed using HUMAnN2, providing insights into gene families and metabolic pathways present in the samples. + The processed reads are analysed to determine the taxonomic composition and functional potential of the microbial community. Taxonomic profiling is performed using Kraken2 and Bracken, enabling classification of reads across different taxonomic levels. Functional profiling is performed using HUMAnN, providing insights into gene families and metabolic pathways present in the samples. The final outputs include read count summaries, taxonomic classifications, functional profiling results, and interactive visualisations, allowing comprehensive analysis of microbial community structure and function. @@ -102,7 +102,7 @@ tabs: datatypes: - fasta outputs: - - label: Assembly quality report + - label: Assembly quality reports (.html, .tsv) button_md: Run QUAST button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fquast%2Fquast" @@ -112,7 +112,7 @@ tabs: - title_md: Kraken2 - Taxonomic classification description_md: | - Classify metagenomic sequencing reads against reference databases to identify microbial taxa present in the samples. This tool enables rapid taxonomic profiling of microbial communities from environmental or biological samples. + Classify metagenomic sequencing reads against reference databases to identify microbial taxa present within the samples. This tool enables rapid taxonomic profiling of microbial communities from environmental or biological samples. Output: Produces taxonomic classification reports and abundance summaries (.txt, .tsv). inputs: @@ -140,7 +140,7 @@ tabs: datatypes: - bam outputs: - - label: Genome bins (FASTA) + - label: Genome bins / MAGs (FASTA) button_md: Run MetaBAT2 button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmetabat2%2Fmetabat2" @@ -158,7 +158,7 @@ tabs: datatypes: - fasta outputs: - - label: Completeness and contamination report + - label: Completeness and contamination reports (.tsv) button_md: Run CheckM2 button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fcheckm2%2Fcheckm2" From c7720513474c56d5f953929cb551bae61516b755 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 15 May 2026 13:06:32 +0800 Subject: [PATCH 83/94] Improve GBS section descriptions and outputs Updated GBS workflow descriptions, outputs, and datatype information for improved clarity, consistency, and user guidance. --- agrf/sections/gbs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index 1b0eccf..e6945ab 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -61,7 +61,7 @@ tabs: datatypes: - fastqsanger outputs: - - label: Sample loci TSV files generated by ustacks + - label: Sample loci and SNP TSV files generated by ustacks button_md: Run ustacks button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_ustacks%2Fstacks_ustacks" @@ -81,7 +81,7 @@ tabs: - title_md: sstacks - Match samples to catalog description_md: | - Match loci from individual samples against the catalogue of consensus loci generated by cstacks. This step identifies shared loci between samples and the catalogue for downstream population analysis. + Match loci from individual samples to the catalogue of consensus loci generated by cstacks. This step identifies shared loci between samples and the catalogue for downstream population analysis. Output: Produces catalogue matching files describing loci matches for each sample (.matches.tsv.gz). inputs: From 5586d58942007630dabe77b10cac2915d42eb758 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 15 May 2026 13:54:10 +0800 Subject: [PATCH 84/94] Minor corrections in GBS section --- agrf/sections/gbs.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml index e6945ab..c31fc67 100644 --- a/agrf/sections/gbs.yml +++ b/agrf/sections/gbs.yml @@ -75,7 +75,7 @@ tabs: datatypes: - tabular outputs: - - label: Catalog of loci TSV file generated by cstacks + - label: Catalog of loci and SNP TSV files generated by cstacks button_md: Run cstacks button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_cstacks%2Fstacks_cstacks" @@ -92,7 +92,7 @@ tabs: datatypes: - tabular outputs: - - label: Matches-to-catalog TSV file generated by sstacks + - label: Matches-to-catalog TSV files generated by sstacks button_md: Run sstacks button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_sstacks%2Fstacks2_sstacks" @@ -108,7 +108,7 @@ tabs: - label: Catalog loci file from cstacks (catalog.tags.tsv.gz) datatypes: - tabular - - label: Matches-to-catalog file from sstacks (sample.matches.tsv.gz) + - label: Matches-to-catalog files from sstacks (sample.matches.tsv.gz) datatypes: - tabular outputs: From 44e6a65ecd4615e319181e5552c422d88e35d4b7 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 15 May 2026 16:56:46 +0800 Subject: [PATCH 85/94] Revise sequencing quality control section and update QC tools Updated the Sequencing Quality Control section based on review feedback. Removed broken workflow references, added NanoPlot, MultiQC, and Cutadapt tools, improved descriptions and outputs, added labelled input datatypes, and revised overview text for better clarity and consistency. --- agrf/base.yml | 2 +- agrf/sections/qualitycontrol.yml | 113 ----------------------------- agrf/sections/rnaseq.yml | 12 +++- agrf/sections/sequencing_qc.yml | 119 +++++++++++++++++++++++++++++++ agrf/static/local/base.yml | 2 +- agrf/templates/intro.md | 2 +- 6 files changed, 131 insertions(+), 119 deletions(-) delete mode 100644 agrf/sections/qualitycontrol.yml create mode 100644 agrf/sections/sequencing_qc.yml diff --git a/agrf/base.yml b/agrf/base.yml index 839ba6b..90740ba 100644 --- a/agrf/base.yml +++ b/agrf/base.yml @@ -24,7 +24,7 @@ conclusion_md: templates/conclusion.md sections: - sections/data.yml - - sections/qualitycontrol.yml + - sections/sequencing_qc.yml - sections/microbial.yml - sections/rnaseq.yml - sections/metagenomics.yml diff --git a/agrf/sections/qualitycontrol.yml b/agrf/sections/qualitycontrol.yml deleted file mode 100644 index b3c6cd0..0000000 --- a/agrf/sections/qualitycontrol.yml +++ /dev/null @@ -1,113 +0,0 @@ - -id: qc -title: Quality control - -tabs: - - id: help - title: Overview - heading_md: - content: - - title_md: What is quality control? - description_md: | - Quality control helps assess raw sequencing data before analysis. It can identify issues such as low-quality reads, adapter contamination, unusual sequence composition, or format problems. - This step helps decide whether data cleaning, trimming, filtering, or format conversion is needed before running downstream workflows. - button_md: Quality Control Tutorial - button_link: https://training.galaxyproject.org/training-material/topics/sequence-analysis/tutorials/quality-control/tutorial.html - - - title_md: What tools and workflows can I use for quality control? - description_md: | - * Galaxy provides tools and workflows for checking read quality, trimming adapters, filtering low-quality reads, and summarising QC results. - * Select the relevant datatype tab above to view Illumina or PacBio quality control options. - - - title_md: Converting data to different formats - description_md: | - Some tools require data in specific formats. You may need to convert between BAM, FASTQ, and FASTQ-Sanger formats, or compress and uncompress files before analysis. - button_md: Tutorial - button_link: https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/upload-rules/tutorial.html - - - id: Illumina - title: Illumina data - heading_md: - content: - - title_md: FastQC - produce sequence quality reports - description_md: | - Assess the quality of raw Illumina sequencing reads before downstream analysis. FastQC reports quality scores, GC content, sequence length distribution, duplication levels, and possible adapter contamination. - - Output: Produces per-sample sequence quality reports and summary statistics (.html, .zip). - inputs: - - datatypes: - - fastq - - fastq.gz - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" - - - title_md: FastP - produce sequence quality reports, trimming & filtering - description_md: | - Perform read quality control, adapter trimming, and filtering for Illumina sequencing reads. FastP can remove low-quality bases, trim adapters, filter poor-quality reads, and generate quality reports. - - Output: Produces cleaned FASTQ reads and quality control reports (.fastq.gz, .html, .json). - inputs: - - datatypes: - - fastqsanger - - fastqsanger.gz - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffastp%2Ffastp" - - - title_md: QC of Illumina data - workflow - description_md: | - Run a paired-end Illumina quality control workflow using FastQC and MultiQC. This workflow checks read quality across samples and combines QC results into a single summary report. - - Output: Produces FastQC reports and a combined MultiQC summary report (.html, .zip). - button_link: https://genome.usegalaxy.org.au/u/anna/w/qc-illumina-data - button_tip: Run workflow - - - id: Pacbio - title: PacBio data - heading_md: - content: - - title_md: Convert BAM files to FASTQ - workflow - description_md: | - Convert PacBio raw read files from BAM format into FASTQ format for downstream analysis. - Run this workflow first if your raw PacBio data is supplied in ccs.bam format. - - Tools: SamToFastq Samtools flagstat - - Output: Produces FASTQ reads and alignment summary statistics (.fastq, .txt). - - - - title_md: Remove HiFi adapters - workflow - description_md: | - Detect and remove adapter-contaminated PacBio HiFi reads before downstream assembly or microbial analysis. - This step improves read quality by filtering sequences that may contain residual adapter contamination. - - Tools: HiFiAdapterFilt - - Output: Produces filtered HiFi read files suitable for downstream analysis (.fastq, .fasta). - - - # - id: Nanopore - # title: Nanopore data - # heading_md: - # content: - # - title_md: NanoPlot - visualize long read data - # description_md: > - #

- # A plotting suite for long read sequencing data and alignments. - #

- # inputs: - # - datatypes: - # - fastq - # - fastq.gz - # button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fnanoplot%2Fnanoplot" - # button_tip: Run tool - - # - title_md: NanoFilt - filter and trim long read data - # description_md: > - #

- # Filter reads based on quality, length, or GC content. - #

- # inputs: - # - datatypes: - # - fastqsanger - # button_link: add tool - - - diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index d5c98f7..b1fee8b 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -31,7 +31,7 @@ tabs: | Filename | Description | |----------|------------| - | Demultiplexed *.FASTQ files | Raw sequencing reads for each sample | + | Demultiplexed FASTQ files (.fastq.gz) | Raw sequencing reads for each sample | | *.bam | Aligned reads mapped to the reference genome | | *.bam.bai | BAM index files for visualisation | | features_summary.csv | Summary of mapping statistics with genome and exons | @@ -79,10 +79,16 @@ tabs: Output: Produces a single interactive summary report across samples (.html). inputs: - - label: FastQC reports + - label: FastQC reports datatypes: - html - zip + - label: FastP reports + datatypes: + - json + - label: Cutadapt / Trim Galore logs + datatypes: + - txt outputs: - label: Summary QC report (HTML) button_md: Run MultiQC @@ -126,7 +132,7 @@ tabs: Output: Produces aligned read files and alignment statistics (.bam, .sam, .tab). inputs: - - label: RNASeq reads (FASTQ) + - label: RNA-seq reads (FASTQ) datatypes: - fastq - label: Reference genome (fasta) diff --git a/agrf/sections/sequencing_qc.yml b/agrf/sections/sequencing_qc.yml new file mode 100644 index 0000000..ef2d0cf --- /dev/null +++ b/agrf/sections/sequencing_qc.yml @@ -0,0 +1,119 @@ +id: qc +title: Sequencing Quality Control + +tabs: + - id: help + title: Overview + heading_md: + content: + - title_md: What is sequencing quality control? + description_md: | + Sequencing quality control helps assess raw sequencing data before analysis. It can identify issues such as low-quality reads, adapter contamination, unusual sequence composition, or format problems. + + This step helps decide whether trimming, filtering, format conversion, or other cleaning steps are needed before downstream analysis. + button_md: Quality Control Tutorial + button_link: https://training.galaxyproject.org/training-material/topics/sequence-analysis/tutorials/quality-control/tutorial.html + + - title_md: What tools can I use for sequencing quality control? + description_md: | + * Galaxy provides tools for checking read quality, trimming adapters, filtering low-quality reads, and summarising QC results. + * Use the Illumina data tab for short-read quality control tools. + * Use the PacBio data tab for long-read quality control tools. + + - title_md: Converting data to different formats + description_md: | + Some tools require data in specific formats. You may need to convert between BAM, FASTQ, and FASTQ-Sanger formats, or compress and uncompress files before analysis. + button_md: Tutorial + button_link: https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/upload-rules/tutorial.html + + - id: Illumina + title: Illumina data + heading_md: + content: + - title_md: FastQC - produce sequence quality reports + description_md: | + Assess the quality of raw Illumina sequencing reads before downstream analysis. FastQC reports quality scores, GC content, sequence length distribution, duplication levels, and possible adapter contamination. + + Output: Produces sequence quality reports and summary statistics for each sample (.html, .zip). + inputs: + - label: Sequencing reads + datatypes: + - fastq + - fastq.gz + - fastqsanger + - fastqsanger.gz + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" + + - title_md: FastP - produce sequence quality reports, trimming and filtering + description_md: | + Perform read quality control, adapter trimming, and filtering for Illumina sequencing reads. FastP can remove low-quality bases, trim adapters, filter poor-quality reads, and generate quality reports. + + Output: Produces cleaned FASTQ reads and quality control reports (.fastq.gz, .html, .json). + inputs: + - label: Raw Illumina sequencing reads + datatypes: + - fastqsanger + - fastqsanger.gz + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffastp%2Ffastp" + + - title_md: Cutadapt - trim adapters and low-quality bases + description_md: | + Remove adapter sequences and low-quality regions from sequencing reads before downstream analysis. This improves read quality and reduces technical sequence contamination. + + Output: Produces trimmed FASTQ reads and trimming reports (.fastq.gz, .txt). + inputs: + - label: Illumina FASTQ reads + datatypes: + - fastqsanger + - fastqsanger.gz + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Flparsons%2Fcutadapt%2Fcutadapt%2F5.2+galaxy0&version=5.2+galaxy0" + + - title_md: MultiQC - summarise quality control reports + description_md: | + Combine quality control results from multiple samples and tools into one summary report. MultiQC is useful for reviewing FastQC, FastP, Cutadapt, and other QC outputs across a whole project. + + Output: Produces an interactive combined quality control summary report (.html). + inputs: + - label: FastQC reports + datatypes: + - html + - zip + + - label: FastP reports + datatypes: + - json + + - label: Cutadapt / Trim Galore logs + datatypes: + - txt + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmultiqc%2Fmultiqc" + + - id: Pacbio + title: PacBio data + heading_md: + content: + - title_md: NanoPlot - visualise long-read sequencing quality + description_md: | + Assess quality metrics for PacBio and Nanopore long-read sequencing datasets. NanoPlot visualises read length distributions, quality scores, and sequencing yield statistics. + + Output: Produces sequencing quality plots and summary reports (.html, .png, .txt). + inputs: + - label: Long-read sequencing data + datatypes: + - fastq + - fastq.gz + - bam + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fnanoplot%2Fnanoplot" + + - title_md: HiFiAdapterFilt - remove PacBio HiFi adapter contamination + description_md: | + Detect and remove adapter-contaminated PacBio HiFi reads before downstream assembly or microbial analysis. This step improves read quality by filtering sequences that may contain residual adapter contamination. + + Output: Produces filtered HiFi read files suitable for downstream analysis (.fastq, .fasta). + inputs: + - label: PacBio HiFi sequencing reads + datatypes: + - fastq + - fastq.gz + - bam + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fgalaxy-australia%2Fhifiadapterfilt%2Fhifiadapterfilt" \ No newline at end of file diff --git a/agrf/static/local/base.yml b/agrf/static/local/base.yml index caed40e..3614a2c 100644 --- a/agrf/static/local/base.yml +++ b/agrf/static/local/base.yml @@ -24,7 +24,7 @@ conclusion_md: templates/conclusion.md sections: - sections/data.yml - - sections/qualitycontrol.yml + - sections/sequencing_qc.yml - sections/microbial.yml - sections/rnaseq.yml - sections/metagenomics.yml diff --git a/agrf/templates/intro.md b/agrf/templates/intro.md index 3e55388..be1a10d 100644 --- a/agrf/templates/intro.md +++ b/agrf/templates/intro.md @@ -63,7 +63,7 @@ then add Section, so data becomes dataSection
From ce6fb27dda96f92f11810a75913bef8985a682c9 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 29 May 2026 10:40:05 +0800 Subject: [PATCH 86/94] Add support and service notice to landing page Added a prominent notice to the AGRF Galaxy Lab landing page clarifying that the site provides self-serve Galaxy tools and training resources. The notice directs users to Galaxy Australia for platform and tool support, and to AGRF for complex bioinformatics analyses and service enquiries. --- agrf/templates/intro.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/agrf/templates/intro.md b/agrf/templates/intro.md index be1a10d..b855a26 100644 --- a/agrf/templates/intro.md +++ b/agrf/templates/intro.md @@ -1,5 +1,9 @@ Welcome to the Galaxy {{ site_name }} {{ lab_name }}! +
+ Important: This Galaxy Lab provides self-serve links to Galaxy tools and training resources. AGRF does not provide support for running or troubleshooting these tools directly. For Galaxy platform or tool issues, please contact Galaxy Australia support. For complex analyses or custom bioinformatics projects, please contact AGRF to discuss service options. +
+ From 78b9a550b1eaa41db2ac63e1943bc0a8948ae55d Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 29 May 2026 11:41:45 +0800 Subject: [PATCH 87/94] Update Microbial Profiling section and workflow support guidance Renamed Microbial section to Microbial Profiling, updated overview content to describe both short-read and long-read microbial profiling approaches, and replaced workflow endorsement with guidance directing workflow support requests to Galaxy Australia. --- agrf/sections/microbial.yml | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 91998a1..631ab57 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -1,5 +1,5 @@ id: microbial -title: Microbial profiling - PacBio full length 16S rRNA analysis +title: Microbial profiling tabs: - id: overview title: Overview @@ -7,7 +7,13 @@ tabs: content: - title_md: About the service description_md: | - PacBio HiFi full-length 16S data is quality filtered and denoised to high quality amplicon single variants (ASVs) using QIIME2 and DADA2. ASV classification is performed using two approaches. We perform a consensus alignment classification (using VSEARCH) against the Genome Taxonomy Database (GTDB r207). This approach should give high consistency. We also perform naïve Bayesian machine learning based classification (DADA2) using three databases that successively fall back to the next database if a species level match is not found. In order, they are the Genome Taxonomy Database (GTDB r207), the SILVA rRNA database (v138), and the NCBI RefSeq 16S rRNA database supplemented by the Ribosomal Database Project (RDP). This approach may improve classification sensitivity for low-abundance ASVs. + Microbial profiling is used to identify and compare microbial communities across samples. This can include long-read approaches, such as PacBio HiFi full-length 16S rRNA sequencing, as well as short-read amplicon sequencing approaches. + + Short-read amplicon sequencing is commonly used for targeted microbial community analysis and taxonomic profiling. Long-read approaches, such as PacBio HiFi full-length 16S sequencing, can provide improved taxonomic resolution by sequencing the full-length 16S rRNA gene. + + The tools and resources in this section support exploration of microbial community composition, diversity analysis, taxonomic classification, and downstream visualisation for both short-read and long-read microbial profiling datasets. + + For PacBio HiFi full-length 16S data, reads are quality filtered and denoised to high-quality amplicon sequence variants (ASVs) using QIIME2 and DADA2. ASV classification is performed using two approaches. We perform a consensus alignment classification using VSEARCH against the Genome Taxonomy Database (GTDB r207). This approach provides consistent taxonomic assignment. We also perform naïve Bayesian classification using three databases that successively fall back to the next database if a species-level match is not found. In order, they are the Genome Taxonomy Database (GTDB r207), the SILVA rRNA database (v138), and the NCBI RefSeq 16S rRNA database supplemented by the Ribosomal Database Project (RDP). This approach may improve classification sensitivity for low-abundance ASVs. ##### Results include @@ -87,10 +93,13 @@ tabs: heading_md: content: - - title_md: 16S HiFi workflow - description_md: PacBio HiFi full-length 16S sequencing workflow for microbiome analysis. - button_md: Request support - button_link: https://usegalaxy.org.au/workflows/run?id=c0a59c4ff4fcc845 + - title_md: Workflow support + description_md: | + AGRF does not provide support for Galaxy workflows listed on this page. + + For workflow-related questions, troubleshooting, or workflow execution support, please contact Galaxy Australia support. + + Complex analyses, custom bioinformatics projects, or AGRF services can be discussed directly with AGRF. - id: tools title: Tools From de125069a1c77d79b374ae6890b0a702ab2276cb Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 29 May 2026 12:05:50 +0800 Subject: [PATCH 88/94] Refine microbial profiling and QC content Applied review feedback to the Microbial Profiling and Sequencing Quality Control sections, including workflow support guidance, PacBio QC simplification, SmartLink notes, and updated short-read/long-read descriptions. --- agrf/sections/sequencing_qc.yml | 35 +++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/agrf/sections/sequencing_qc.yml b/agrf/sections/sequencing_qc.yml index ef2d0cf..328146a 100644 --- a/agrf/sections/sequencing_qc.yml +++ b/agrf/sections/sequencing_qc.yml @@ -17,8 +17,8 @@ tabs: - title_md: What tools can I use for sequencing quality control? description_md: | * Galaxy provides tools for checking read quality, trimming adapters, filtering low-quality reads, and summarising QC results. - * Use the Illumina data tab for short-read quality control tools. - * Use the PacBio data tab for long-read quality control tools. + * Use the Illumina data tab for short-read sequencing quality control tools. + * Use the PacBio data tab for long-read sequencing quality control tools. - title_md: Converting data to different formats description_md: | @@ -30,7 +30,7 @@ tabs: title: Illumina data heading_md: content: - - title_md: FastQC - produce sequence quality reports + - title_md: FastQC - assess sequencing quality description_md: | Assess the quality of raw Illumina sequencing reads before downstream analysis. FastQC reports quality scores, GC content, sequence length distribution, duplication levels, and possible adapter contamination. @@ -44,7 +44,7 @@ tabs: - fastqsanger.gz button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" - - title_md: FastP - produce sequence quality reports, trimming and filtering + - title_md: FastP - trim and filter sequencing reads description_md: | Perform read quality control, adapter trimming, and filtering for Illumina sequencing reads. FastP can remove low-quality bases, trim adapters, filter poor-quality reads, and generate quality reports. @@ -92,28 +92,33 @@ tabs: title: PacBio data heading_md: content: - - title_md: NanoPlot - visualise long-read sequencing quality + - title_md: PacBio quality control description_md: | - Assess quality metrics for PacBio and Nanopore long-read sequencing datasets. NanoPlot visualises read length distributions, quality scores, and sequencing yield statistics. + PacBio HiFi sequencing data is typically assessed using PacBio SmartLink software, which provides sequencing run metrics, read quality summaries, and quality control reports. - Output: Produces sequencing quality plots and summary reports (.html, .png, .txt). + SmartLink is not currently available within Galaxy. The tools listed below provide basic quality control functionality for PacBio long-read data within Galaxy. + + - title_md: FastQC - basic sequence quality assessment + description_md: | + FastQC can be used to assess basic sequence quality metrics for PacBio FASTQ files, including read quality scores, sequence length distributions, and sequence composition. + + Output: Produces sequence quality reports and summary statistics (.html, .zip). inputs: - - label: Long-read sequencing data + - label: PacBio FASTQ reads datatypes: - fastq - fastq.gz - - bam - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fnanoplot%2Fnanoplot" + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" - - title_md: HiFiAdapterFilt - remove PacBio HiFi adapter contamination + - title_md: NanoPlot - visualise long-read sequencing quality description_md: | - Detect and remove adapter-contaminated PacBio HiFi reads before downstream assembly or microbial analysis. This step improves read quality by filtering sequences that may contain residual adapter contamination. + Assess quality metrics for PacBio and Nanopore long-read sequencing datasets. NanoPlot visualises read length distributions, quality scores, sequencing yield, and other long-read summary statistics. - Output: Produces filtered HiFi read files suitable for downstream analysis (.fastq, .fasta). + Output: Produces sequencing quality plots and summary reports (.html, .png, .txt). inputs: - - label: PacBio HiFi sequencing reads + - label: Long-read sequencing data datatypes: - fastq - fastq.gz - bam - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fgalaxy-australia%2Fhifiadapterfilt%2Fhifiadapterfilt" \ No newline at end of file + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fnanoplot%2Fnanoplot" \ No newline at end of file From bd5bf52d5baf5aaab7e20362315ea563395b95c6 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 29 May 2026 12:44:42 +0800 Subject: [PATCH 89/94] Split metagenomics assembly tools by read type Updated the Metagenomics section by separating short-read and long-read assembly tools. Added dedicated sections for MEGAHIT and metaSPAdes (short-read assembly) and meta-hifiasm (long-read assembly) to improve clarity and better reflect different sequencing approaches. --- agrf/sections/metagenomics.yml | 57 +++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml index aaef8a0..c6f36e4 100644 --- a/agrf/sections/metagenomics.yml +++ b/agrf/sections/metagenomics.yml @@ -8,7 +8,7 @@ tabs: - title_md: About the service description_md: | - Illumina metagenomic sequencing (MetaWGS) is used to characterise microbial communities by sequencing DNA directly from environmental or biological samples. + Metagenomic sequencing is used to characterise microbial communities by sequencing DNA directly from environmental or biological samples. Both short-read (Illumina) and long-read sequencing approaches can be used depending on the study objectives. Sequencing is performed using paired-end reads (150 bp) on an Illumina NovaSeq platform. Raw sequencing data undergoes quality control and filtering to remove adapter sequences and low-quality bases. @@ -58,36 +58,65 @@ tabs: content: subsections: - - id: assembly - title: Assembly + - id: short_read_assembly + title: Short-read assembly content: - - title_md: MEGAHIT - Assemble short-reads into contigs + - title_md: MEGAHIT - Assemble short-read metagenomes description_md: | - Assemble short-read metagenomic sequencing data into longer contiguous sequences (contigs). MEGAHIT is designed for large and complex metagenomic datasets and helps reconstruct microbial genomes from environmental sequencing reads. + Assemble short-read metagenomic sequencing data into longer contiguous sequences called contigs. - Output: Produces assembled contig sequences for downstream binning and annotation (.fasta). + MEGAHIT is designed for large and complex Illumina metagenomic datasets and helps reconstruct microbial sequences from short-read data. + + Output: + Produces assembled contig sequences for downstream binning and annotation (.fasta). inputs: - - label: Sequencing reads (FASTQ) + - label: Illumina short-read FASTQ data datatypes: - fastqsanger + - fastqsanger.gz outputs: - - label: Assembled contigs (FASTA) + - label: Assembled contigs (.fasta) button_md: Run MEGAHIT button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmegahit%2Fmegahit" - - title_md: metaSPAdes / meta-hifiasm - Assemble long-reads or hybrid reads + - title_md: metaSPAdes - Assemble short-read metagenomes description_md: | - Assemble long-read or hybrid metagenomic sequencing data into high-quality contigs. These assemblers are designed for complex microbial communities and can improve genome reconstruction using long-read or hybrid sequencing approaches. + Assemble short-read metagenomic sequencing data into contigs using the metaSPAdes assembler. - Output: Produces assembled contig sequences and assembly graphs for downstream analysis (.fasta, .gfa). + metaSPAdes is designed for metagenomic datasets containing mixed microbial communities and is commonly used for Illumina short-read metagenome assembly. + + Output: + Produces assembled contigs and assembly graph files for downstream analysis (.fasta, .gfa). inputs: - - label: Long reads (FASTQ) + - label: Illumina short-read FASTQ data datatypes: - fastqsanger + - fastqsanger.gz outputs: - - label: Assembled contigs (FASTA) + - label: Assembled contigs and assembly graph files (.fasta, .gfa) button_md: Run metaSPAdes - button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fnml%2Fmetaspades%2Fmetaspades" + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fnml%2Fmetaspades%2Fmetaspades" + + - id: long_read_assembly + title: Long-read assembly + content: + - title_md: meta-hifiasm - Assemble PacBio HiFi metagenomes + description_md: | + Assemble long-read metagenomic sequencing data generated using PacBio HiFi sequencing. + + meta-hifiasm is designed for metagenomic assembly from highly accurate long reads and can generate more contiguous assemblies for complex microbial communities. + + Output: + Produces assembled contigs and assembly graph files for downstream analysis (.fasta, .gfa). + inputs: + - label: PacBio HiFi long-read FASTQ data + datatypes: + - fastqsanger + - fastqsanger.gz + outputs: + - label: Assembled contigs and assembly graph files (.fasta, .gfa) + button_md: Run meta-hifiasm + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fgalaxy-australia%2Fhifiasm_meta%2Fhifiasm_meta" - id: assembly_qc title: Assembly quality control From b0755eb7b255f8a2d8042cba9065a3a0f48601a6 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 29 May 2026 13:24:48 +0800 Subject: [PATCH 90/94] Refine RNA-seq workflow and replace featureCounts section --- agrf/sections/rnaseq.yml | 72 +++++++++++----------------------------- 1 file changed, 20 insertions(+), 52 deletions(-) diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml index b1fee8b..55fa8f2 100644 --- a/agrf/sections/rnaseq.yml +++ b/agrf/sections/rnaseq.yml @@ -167,50 +167,50 @@ tabs: - id: quantification title: Gene quantification content: - - title_md: featureCounts - Count reads per gene + - title_md: StringTie - Assemble and quantify transcripts description_md: | - Count the number of reads assigned to genes using aligned BAM files and a gene annotation file. This step converts mapped reads into a gene-level count matrix for statistical analysis. - - Output: Produces gene count tables for downstream filtering, normalisation, and differential expression analysis (.txt, .tsv). + Assemble transcripts and estimate transcript abundance from aligned RNA-seq reads. This tool can reconstruct transcript structures and support gene or isoform-level expression analysis. + Output: Produces transcript assembly and expression estimate files (.gtf, .tab). inputs: - label: Aligned reads (BAM) datatypes: - bam - - label: Gene annotation file + - label: Reference annotation (GTF) datatypes: - gtf - - gff outputs: - - label: Gene count matrix (TSV) - button_md: Run featureCounts - button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffeaturecounts%2Ffeaturecounts" - - - title_md: StringTie - Assemble and quantify transcripts + - label: Transcript assembly (GTF) + button_md: Run StringTie + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstringtie%2Fstringtie" + + - title_md: featureCounts - Count reads per gene description_md: | - Assemble transcripts and estimate transcript abundance from aligned RNA-seq reads. This tool can reconstruct transcript structures and support gene or isoform-level expression analysis. + Count the number of reads assigned to genes using aligned BAM files and a gene annotation file. This step converts mapped reads into a gene-level count matrix for statistical analysis. + + Output: Produces gene count tables for downstream filtering, normalisation, and differential expression analysis (.txt, .tsv). - Output: Produces transcript assembly and expression estimate files (.gtf, .tab). inputs: - label: Aligned reads (BAM) datatypes: - bam - - label: Reference annotation (GTF) + - label: Gene annotation file datatypes: - gtf + - gff outputs: - - label: Transcript assembly (GTF) - button_md: Run StringTie - button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstringtie%2Fstringtie" + - label: Gene count matrix (TSV) + button_md: Run featureCounts + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffeaturecounts%2Ffeaturecounts" - id: filtering title: Filtering content: - - title_md: Sample filtering + - title_md: Filter samples and lowly expressed genes description_md: | - Remove unwanted samples, outliers, or selected sample groups before downstream analysis. This step helps ensure that only relevant samples are included in expression analysis and visualisation. + Filter count matrices by selecting relevant samples and removing lowly expressed genes before differential expression analysis. - Output: Produces filtered count matrices and metadata tables containing selected samples (.tsv, .txt). + Output: Produces filtered count matrices suitable for downstream statistical analysis (.tsv, .txt). inputs: - label: Gene count matrix datatypes: @@ -220,43 +220,11 @@ tabs: - tsv outputs: - label: Filtered gene count matrix - button_md: Run Filter - button_link: "{{ galaxy_base_url }}/?tool_id=Filter1" - - - title_md: Filter lowly expressed genes - description_md: | - Remove genes with very low read counts across samples before statistical testing. Filtering lowly expressed genes helps reduce noise and improves the reliability of differential expression analysis. - - Output: Produces filtered gene count matrices suitable for downstream analysis (.tsv, .txt). - inputs: - - label: Gene count matrix datatypes: - tsv - outputs: - - label: Filtered gene count matrix button_md: Run Filter button_link: "{{ galaxy_base_url }}/?tool_id=Filter1" - - id: normalization - title: Normalization - content: - - title_md: Normalization - description_md: | - Adjust gene counts to account for differences in sequencing depth and library size between samples. Normalisation makes gene expression values more comparable across samples before visualisation or differential expression analysis. - - Output: Produces normalised expression values or internally normalised count matrices for downstream statistical analysis (.tsv, .txt). - inputs: - - label: Gene count matrix - datatypes: - - tsv - - label: Sample metadata - datatypes: - - tsv - outputs: - - label: Normalized expression matrix - button_md: Run DESeq2 - button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fdeseq2%2Fdeseq2" - - id: analysis title: Differential expression content: From c9679383ece878cc3262869adc8ed9ec0ced29bc Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 29 May 2026 16:42:04 +0800 Subject: [PATCH 91/94] Update microbial section content Removed workflow support content and updated microbial section structure. --- agrf/sections/microbial.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 631ab57..c364eea 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -87,20 +87,6 @@ tabs: | `.tsv`
tab separated values | Table in text file format. Can be opened in any text editor or imported into Excel. | - - - id: workflows - title: Workflows - heading_md: - content: - - - title_md: Workflow support - description_md: | - AGRF does not provide support for Galaxy workflows listed on this page. - - For workflow-related questions, troubleshooting, or workflow execution support, please contact Galaxy Australia support. - - Complex analyses, custom bioinformatics projects, or AGRF services can be discussed directly with AGRF. - - id: tools title: Tools content: From 54dd6a2fbde98514cb4c2eb34d9ddc6bb9065e60 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 29 May 2026 16:42:35 +0800 Subject: [PATCH 92/94] Update AGRF Lab homepage links Added Genome Assembly card and updated homepage navigation links. --- agrf/templates/intro.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/agrf/templates/intro.md b/agrf/templates/intro.md index b855a26..98dea80 100644 --- a/agrf/templates/intro.md +++ b/agrf/templates/intro.md @@ -81,6 +81,9 @@ then add Section, so data becomes dataSection +
From 1f5bae080c43f50c2cb21e81ed551a737f64097d Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 29 May 2026 16:43:08 +0800 Subject: [PATCH 93/94] Update site navigation Added Genome Assembly section to the AGRF Lab navigation and section list. --- agrf/base.yml | 1 + agrf/static/local/base.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/agrf/base.yml b/agrf/base.yml index 90740ba..792eb0d 100644 --- a/agrf/base.yml +++ b/agrf/base.yml @@ -28,6 +28,7 @@ sections: - sections/microbial.yml - sections/rnaseq.yml - sections/metagenomics.yml + - sections/genomeassembly.yml - sections/gbs.yml - sections/learn.yml - sections/help_contact.yml diff --git a/agrf/static/local/base.yml b/agrf/static/local/base.yml index 3614a2c..5193a80 100644 --- a/agrf/static/local/base.yml +++ b/agrf/static/local/base.yml @@ -28,6 +28,7 @@ sections: - sections/microbial.yml - sections/rnaseq.yml - sections/metagenomics.yml + - sections/genomeassembly.yml - sections/gbs.yml - sections/learn.yml - sections/help_contact.yml From 67a6028277ff375d9f2b1c6a78d0a34ce0f69347 Mon Sep 17 00:00:00 2001 From: tojoa10 Date: Fri, 29 May 2026 16:43:30 +0800 Subject: [PATCH 94/94] Add Genome Assembly section Added a new Genome Assembly section including overview, tools, and tutorials. Added support for SPAdes, hifiasm, QUAST, BUSCO, Bakta, FastANI, and MUMmer. --- agrf/sections/genomeassembly.yml | 164 +++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 agrf/sections/genomeassembly.yml diff --git a/agrf/sections/genomeassembly.yml b/agrf/sections/genomeassembly.yml new file mode 100644 index 0000000..73e956a --- /dev/null +++ b/agrf/sections/genomeassembly.yml @@ -0,0 +1,164 @@ +id: genomeassembly +title: Genome Assembly + +tabs: + - id: overview + title: Overview + heading_md: + content: + - title_md: About the service + description_md: | + Genome assembly is used to reconstruct draft or complete genomes from sequencing reads. This section provides tools for bacterial and small genome assembly using both Illumina short-read and PacBio HiFi long-read sequencing data. + + Short-read assembly tools such as SPAdes are commonly used for bacterial and small genome assembly from Illumina data. Long-read assembly tools such as hifiasm can generate highly contiguous genome assemblies from PacBio HiFi reads. + + Following assembly, quality control and completeness assessment tools can be used to evaluate assembly quality, while annotation tools can identify genes and functional features for downstream comparative genomics analyses. + + - title_md: Results include + description_md: | + - Assembled genome contigs and scaffolds. + - Assembly graph files. + - Assembly quality assessment reports. + - Genome completeness reports. + - Annotated genome files. + - Predicted protein and gene sequence files. + - Genome comparison and similarity reports. + + - title_md: Common file formats + description_md: | + | File type | Description | + | --------- | ----------- | + | `.fastq`, `.fastq.gz` | Raw sequencing reads used as input for assembly. | + | `.fasta`, `.fa`, `.fna` | Assembled contigs, scaffolds, or genome sequences. | + | `.gfa` | Assembly graph file produced by some assemblers. | + | `.gff3`, `.gbff` | Genome annotation files. | + | `.faa`, `.ffn` | Predicted protein and nucleotide gene sequences. | + | `.tsv`, `.txt`, `.html` | Summary reports and quality assessment outputs. | + + - id: tools + title: Tools + heading_md: + content: + subsections: + - id: assembly + title: Assembly + content: + - title_md: SPAdes - Assemble bacterial and small genomes + description_md: | + Assemble Illumina short-read sequencing data into contigs for bacterial and small genome assembly projects. SPAdes is commonly used for de novo assembly of microbial genomes from short-read data. + + Output: Produces assembled contigs and scaffolds for downstream quality control and annotation (.fasta). + inputs: + - label: Illumina short-read FASTQ data + datatypes: + - fastqsanger + - fastqsanger.gz + outputs: + - label: Assembled contigs/scaffolds + button_md: Run SPAdes + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fnml%2Fspades%2Fspades%2F4.2.0%2Bgalaxy0&version=latest" + + - title_md: hifiasm - Assemble PacBio HiFi genomes + description_md: | + Assemble PacBio HiFi long-read sequencing data into high-quality genome assemblies. hifiasm is designed for accurate long-read genome assembly and can generate highly contiguous assemblies. + + Output: Produces assembled genome contigs and assembly graph files for downstream analysis (.fasta, .gfa). + inputs: + - label: PacBio HiFi long-read FASTQ data + datatypes: + - fastqsanger + - fastqsanger.gz + outputs: + - label: Assembled contigs and assembly graph files + button_md: Run hifiasm + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbgruening%2Fhifiasm%2Fhifiasm" + + - id: assembly_qc + title: Assembly quality control + content: + - title_md: QUAST - Assess assembly quality + description_md: | + Assess genome assembly quality using summary metrics such as contig number, assembly length, N50, GC content, and other assembly statistics. + + Output: Produces assembly quality reports and summary statistics (.html, .txt, .tsv). + inputs: + - label: Assembled contigs/scaffolds + datatypes: + - fasta + outputs: + - label: Assembly quality report + button_md: Run QUAST + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fquast%2Fquast" + + - title_md: BUSCO - Assess genome completeness + description_md: | + Estimate genome assembly completeness by searching for conserved single-copy orthologs expected to be present in a selected lineage. BUSCO helps assess whether a genome assembly is complete and suitable for downstream analysis. + + Output: Produces completeness summaries and BUSCO assessment reports (.txt, .tsv, .json). + inputs: + - label: Genome assembly + datatypes: + - fasta + outputs: + - label: Genome completeness report + button_md: Run BUSCO + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbusco%2Fbusco" + + - id: annotation + title: Annotation + content: + - title_md: Bakta - Annotate bacterial genomes + description_md: | + Annotate bacterial genome assemblies by identifying coding sequences, rRNA genes, tRNA genes, and functional features. Bakta is useful for producing standardised bacterial genome annotations. + + Output: Produces annotated genome files and functional annotation tables (.gff3, .gbff, .faa, .ffn, .tsv). + inputs: + - label: Bacterial genome assembly + datatypes: + - fasta + outputs: + - label: Genome annotation files + button_md: Run Bakta + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbakta%2Fbakta" + + - id: genome_comparison + title: Genome comparison + content: + - title_md: FastANI - Calculate average nucleotide identity + description_md: | + Compare genome assemblies using Average Nucleotide Identity (ANI). FastANI is commonly used to assess genomic similarity between bacterial genomes and can assist with species-level comparisons. + + Output: Produces ANI similarity statistics and comparison reports (.txt, .tsv). + inputs: + - label: Genome assemblies + datatypes: + - fasta + outputs: + - label: ANI comparison report + button_md: Run FastANI + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffastani%2Ffastani" + + - title_md: MUMmer - Compare genome assemblies + description_md: | + Compare assembled genomes and identify similarities, differences, and structural variation between assemblies. MUMmer is commonly used for whole-genome alignment and comparative genomics analyses. + + Output: Produces genome alignment and comparison files for downstream visualisation and analysis. + inputs: + - label: Genome assemblies + datatypes: + - fasta + outputs: + - label: Genome comparison results + button_md: Run MUMmer + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmummer_mummer%2Fmummer_mummer%2F4.0.1%2Bgalaxy0&version=latest" + + + - id: tutorials + title: Tutorials + heading_md: + content: + - title_md: Genome assembly training + description_md: | + Learn about genome assembly concepts, assembly quality control, and downstream analysis using Galaxy training resources. + button_md: Galaxy Training + button_link: https://training.galaxyproject.org/training-material/topics/assembly/ \ No newline at end of file