diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..31effd3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +python-3.14.3-amd64 (1).exe +venv +venv311 +.DS_Stores diff --git a/agrf/CONTRIBUTORS b/agrf/CONTRIBUTORS deleted file mode 100644 index b0047fa..0000000 --- a/agrf/CONTRIBUTORS +++ /dev/null @@ -1 +0,0 @@ -None diff --git a/agrf/base.yml b/agrf/base.yml index d0a068b..792eb0d 100644 --- a/agrf/base.yml +++ b/agrf/base.yml @@ -5,7 +5,7 @@ site_name: "Australia" lab_name: AGRF Lab #this will be in caps, at right of logo #or: use the word lab, in same font as agrf logo -galaxy_base_url: https://agrf.usegalaxy.org.au +galaxy_base_url: https://usegalaxy.org.au subdomain: agrf root_domain: usegalaxy.org.au @@ -24,10 +24,16 @@ conclusion_md: templates/conclusion.md sections: - sections/data.yml - - sections/qualitycontrol.yml + - sections/sequencing_qc.yml - sections/microbial.yml -# - sections/moreanalysis.yml + - sections/rnaseq.yml + - sections/metagenomics.yml + - sections/genomeassembly.yml + - sections/gbs.yml - sections/learn.yml -# - sections/help.yml + - sections/help_contact.yml +# - sections/moreanalysis.yml + + # - sections/agrf.yml diff --git a/agrf/sections/data.yml b/agrf/sections/data.yml index 550aac9..b087809 100644 --- a/agrf/sections/data.yml +++ b/agrf/sections/data.yml @@ -1,43 +1,37 @@ id: import title: Import data + tabs: - id: overview title: Overview - heading_md: + heading_md: + content: + - title_md: Get AGRF data into Galaxy description_md: | - * Find your fastq data at the [AGRF Hub](https://www.agrf.org.au/my-agrf-hub). - * Download this data to your computer. - * Upload data from your computer to Galaxy. - * Tip: only import the files that you need. + * Find your FASTQ data at the [AGRF Hub](https://www.agrf.org.au/my-agrf-hub). + * Download the required files to your computer. + * Upload data from your computer into Galaxy. + * Tip: only import the files that you need for your analysis. + + *Note: Galaxy and the AGRF Hub are not configured for long-term data storage. Ensure raw and processed data are backed up to a secure archive.* - *Note: Galaxy and the AGRF Hub are not configured for long-term data storage. Ensure you save raw and processed data to a secure archive.* button_md: Upload data button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=upload1" + - title_md: Compress large files description_md: | - * After uploading data, we recommend compressing fastq and fasta files. - * This will save you time and storage space. + * After uploading data, we recommend compressing FASTQ and FASTA files. + * Compression reduces storage usage and can improve upload and download performance. + button_md: Compress data - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=https://toolshed.g2.bx.psu.edu/repos/iuc/compress_file" #does this need additional text eg %2Frepos% etc? + button_link: "https://usegalaxy.org.au/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fcompress_file%2Fcompress_file%2F0.1.0&version=latest" + # - title_md: Learn how to use Galaxy # description_md: | - # * Galaxy Australia is free to use. - # * Get started with these user-friendly tutorials. + # * Galaxy Australia is free to use. + # * Get started with these user-friendly tutorials. + # # button_md: Galaxy tutorials - # button_link: "https://training.galaxyproject.org/" - - - id: help - title: Help - heading_md: - content: - - - title_md: Galaxy Australia support - description_md: Request support, new tools, or additional storage. - button_md: Request support - button_link: /request/support - - title_md: AGRF support - description_md: Contact AGRF for more help with your data. - button_md: Contact AGRF - button_link: /request/support + # button_link: "https://training.galaxyproject.org/" \ No newline at end of file diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml new file mode 100644 index 0000000..c31fc67 --- /dev/null +++ b/agrf/sections/gbs.yml @@ -0,0 +1,167 @@ +id: gbs +title: GBS + +tabs: + - id: overview + title: Overview + content: + - title_md: About the service + description_md: | + Genotyping-by-sequencing (GBS) is used to identify genetic variation across multiple samples by sequencing reduced representation regions of the genome generated through restriction enzyme digestion. + + Library preparation is performed using a ddRAD-based protocol, which includes digestion of genomic DNA with restriction enzymes, ligation of barcoded adapters, size selection of fragments, and PCR amplification to generate sequencing-ready libraries. + + Sequencing data is processed using the DeNovoGBS pipeline from NGSEP software. Raw sequencing reads are demultiplexed into individual samples based on barcode sequences. Reads are clustered based on sequence similarity, and variants are detected within each cluster using a probabilistic model. + + Quality control metrics are generated throughout the analysis, including read counts per sample, clustering statistics, and similarity between samples. + + The final outputs include variant calls, consensus sequences for each cluster, demultiplexed sequencing reads, and quality control visualisations, enabling downstream population genetics and diversity analyses. + + - title_md: Sequencing Output + description_md: | + **Number of samples:** 95 samples following demultiplexing + + Raw sequencing reads are provided as compressed FASTQ files for each individual sample after barcode-based demultiplexing. + + - title_md: What files are included? + description_md: | + + | Filename | Description | + |----------|------------| + | *.fastq.gz | Demultiplexed sequencing reads for each sample | + | *_variants.vcf | Variant calls across all samples | + | *.fa | Consensus sequences for each read cluster | + | QC figures (.jpg) | Quality control visualisations (read counts, clustering, IBS heatmap) | + + - title_md: File formats used + description_md: | + + | Type | Description | + |------|------------| + | .fastq.gz | Demultiplexed sequencing reads per sample | + | .vcf | Variant call format for genotype data | + | .fa | FASTA format for consensus sequences | + | .jpg | Quality control figures | + + - id: tools + title: Tools + content: + subsections: + - id: stacks + title: STACKS workflow + content: + + - title_md: ustacks - Build loci for each sample + description_md: | + Assemble short-read sequences into putative loci for each sample and identify SNPs within stacks of matching reads. This is the first step in the STACKS workflow and is used to build loci from raw GBS or ddRAD sequencing reads. + + Output: Produces loci assemblies, SNP calls, haplotype information, and model files for each sample (.tags.tsv.gz, .snps.tsv.gz, .alleles.tsv.gz, .models.tsv.gz). + inputs: + - label: Demultiplexed FASTQ reads for each sample + datatypes: + - fastqsanger + outputs: + - label: Sample loci and SNP TSV files generated by ustacks + button_md: Run ustacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_ustacks%2Fstacks_ustacks" + + - title_md: cstacks - Create catalog of loci + description_md: | + Create a catalogue of consensus loci by combining loci identified across multiple samples. This catalogue acts as a reference set of loci for downstream matching and population analysis. + + Output: Produces catalogue loci and SNP information shared across samples (.catalog.tags.tsv.gz, .catalog.snps.tsv.gz). + inputs: + - label: Sample loci files from ustacks (sample.tags.tsv.gz) + datatypes: + - tabular + outputs: + - label: Catalog of loci and SNP TSV files generated by cstacks + button_md: Run cstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_cstacks%2Fstacks_cstacks" + + - title_md: sstacks - Match samples to catalog + description_md: | + Match loci from individual samples to the catalogue of consensus loci generated by cstacks. This step identifies shared loci between samples and the catalogue for downstream population analysis. + + Output: Produces catalogue matching files describing loci matches for each sample (.matches.tsv.gz). + inputs: + - label: Sample loci files from ustacks (sample.tags.tsv.gz) + datatypes: + - tabular + - label: Catalog loci file from cstacks (catalog.tags.tsv.gz) + datatypes: + - tabular + outputs: + - label: Matches-to-catalog TSV files generated by sstacks + button_md: Run sstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_sstacks%2Fstacks2_sstacks" + + - title_md: tsv2bam - Convert TSV to BAM + description_md: | + Convert STACKS locus information into BAM format while incorporating population map information. This step prepares aligned loci information for downstream genotype calling and population genetics analysis. + + Output: Produces BAM alignment files and associated population mapping information (.bam). + inputs: + - label: Loci and polymorphism files from STACKS (*.tags.tsv.gz, *.snps.tsv.gz, *.alleles.tsv.gz) + datatypes: + - tabular + - label: Catalog loci file from cstacks (catalog.tags.tsv.gz) + datatypes: + - tabular + - label: Matches-to-catalog files from sstacks (sample.matches.tsv.gz) + datatypes: + - tabular + outputs: + - label: BAM alignment files generated by tsv2bam + button_md: Run tsv2bam + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_tsv2bam%2Fstacks2_tsv2bam" + + - title_md: gstacks - Assemble loci and call variants + description_md: | + Assemble loci, align reads, and perform SNP and genotype calling across all samples in the population. This step generates population-level variant information from aligned STACKS data. + + Output: Produces genotype calls, assembled loci, and population-level SNP information (.vcf, .tsv.gz). + inputs: + - label: BAM alignment files generated by tsv2bam + datatypes: + - bam + outputs: + - label: Variant calls (VCF) and assembled loci generated by gstacks + button_md: Run gstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_gstacks%2Fstacks2_gstacks" + + - title_md: populations - Population genetics analysis + description_md: | + Perform population genetics analysis and export filtered variant datasets for downstream analysis. This tool calculates population statistics, applies locus filtering, and prepares data for diversity and population structure analysis. + + Output: Produces filtered SNP datasets, population statistics, and export files for downstream analysis (.vcf, .tsv, .structure, .phylip). + inputs: + - label: Variant calls and assembled loci generated by gstacks + datatypes: + - vcf + outputs: + - label: Population statistics and export files generated by populations + button_md: Run populations + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_populations%2Fstacks2_populations" + + - title_md: bcftools filter - Filter variant data + description_md: | + Filter variant call files (VCF) based on minor allele frequency (MAF), missing data thresholds, depth, and quality metrics. This step helps retain high-confidence variants for downstream population genetics and diversity analysis. + + Output: Produces filtered variant datasets suitable for downstream analysis (.vcf). + inputs: + - label: Variant call file (VCF) generated by NGSEP, gstacks, or another variant calling workflow + datatypes: + - vcf + outputs: + - label: Filtered VCF file for downstream population analysis + button_md: Launch Tool + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbcftools_filter%2Fbcftools_filter" + + - id: tutorials + title: Tutorials + content: + - title_md: GBS analysis tutorials + description_md: Explore Galaxy Training Network tutorials for variant analysis workflows relevant to GBS and population genomics studies. + button_md: Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/variant-analysis/ diff --git a/agrf/sections/genomeassembly.yml b/agrf/sections/genomeassembly.yml new file mode 100644 index 0000000..73e956a --- /dev/null +++ b/agrf/sections/genomeassembly.yml @@ -0,0 +1,164 @@ +id: genomeassembly +title: Genome Assembly + +tabs: + - id: overview + title: Overview + heading_md: + content: + - title_md: About the service + description_md: | + Genome assembly is used to reconstruct draft or complete genomes from sequencing reads. This section provides tools for bacterial and small genome assembly using both Illumina short-read and PacBio HiFi long-read sequencing data. + + Short-read assembly tools such as SPAdes are commonly used for bacterial and small genome assembly from Illumina data. Long-read assembly tools such as hifiasm can generate highly contiguous genome assemblies from PacBio HiFi reads. + + Following assembly, quality control and completeness assessment tools can be used to evaluate assembly quality, while annotation tools can identify genes and functional features for downstream comparative genomics analyses. + + - title_md: Results include + description_md: | + - Assembled genome contigs and scaffolds. + - Assembly graph files. + - Assembly quality assessment reports. + - Genome completeness reports. + - Annotated genome files. + - Predicted protein and gene sequence files. + - Genome comparison and similarity reports. + + - title_md: Common file formats + description_md: | + | File type | Description | + | --------- | ----------- | + | `.fastq`, `.fastq.gz` | Raw sequencing reads used as input for assembly. | + | `.fasta`, `.fa`, `.fna` | Assembled contigs, scaffolds, or genome sequences. | + | `.gfa` | Assembly graph file produced by some assemblers. | + | `.gff3`, `.gbff` | Genome annotation files. | + | `.faa`, `.ffn` | Predicted protein and nucleotide gene sequences. | + | `.tsv`, `.txt`, `.html` | Summary reports and quality assessment outputs. | + + - id: tools + title: Tools + heading_md: + content: + subsections: + - id: assembly + title: Assembly + content: + - title_md: SPAdes - Assemble bacterial and small genomes + description_md: | + Assemble Illumina short-read sequencing data into contigs for bacterial and small genome assembly projects. SPAdes is commonly used for de novo assembly of microbial genomes from short-read data. + + Output: Produces assembled contigs and scaffolds for downstream quality control and annotation (.fasta). + inputs: + - label: Illumina short-read FASTQ data + datatypes: + - fastqsanger + - fastqsanger.gz + outputs: + - label: Assembled contigs/scaffolds + button_md: Run SPAdes + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fnml%2Fspades%2Fspades%2F4.2.0%2Bgalaxy0&version=latest" + + - title_md: hifiasm - Assemble PacBio HiFi genomes + description_md: | + Assemble PacBio HiFi long-read sequencing data into high-quality genome assemblies. hifiasm is designed for accurate long-read genome assembly and can generate highly contiguous assemblies. + + Output: Produces assembled genome contigs and assembly graph files for downstream analysis (.fasta, .gfa). + inputs: + - label: PacBio HiFi long-read FASTQ data + datatypes: + - fastqsanger + - fastqsanger.gz + outputs: + - label: Assembled contigs and assembly graph files + button_md: Run hifiasm + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbgruening%2Fhifiasm%2Fhifiasm" + + - id: assembly_qc + title: Assembly quality control + content: + - title_md: QUAST - Assess assembly quality + description_md: | + Assess genome assembly quality using summary metrics such as contig number, assembly length, N50, GC content, and other assembly statistics. + + Output: Produces assembly quality reports and summary statistics (.html, .txt, .tsv). + inputs: + - label: Assembled contigs/scaffolds + datatypes: + - fasta + outputs: + - label: Assembly quality report + button_md: Run QUAST + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fquast%2Fquast" + + - title_md: BUSCO - Assess genome completeness + description_md: | + Estimate genome assembly completeness by searching for conserved single-copy orthologs expected to be present in a selected lineage. BUSCO helps assess whether a genome assembly is complete and suitable for downstream analysis. + + Output: Produces completeness summaries and BUSCO assessment reports (.txt, .tsv, .json). + inputs: + - label: Genome assembly + datatypes: + - fasta + outputs: + - label: Genome completeness report + button_md: Run BUSCO + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbusco%2Fbusco" + + - id: annotation + title: Annotation + content: + - title_md: Bakta - Annotate bacterial genomes + description_md: | + Annotate bacterial genome assemblies by identifying coding sequences, rRNA genes, tRNA genes, and functional features. Bakta is useful for producing standardised bacterial genome annotations. + + Output: Produces annotated genome files and functional annotation tables (.gff3, .gbff, .faa, .ffn, .tsv). + inputs: + - label: Bacterial genome assembly + datatypes: + - fasta + outputs: + - label: Genome annotation files + button_md: Run Bakta + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbakta%2Fbakta" + + - id: genome_comparison + title: Genome comparison + content: + - title_md: FastANI - Calculate average nucleotide identity + description_md: | + Compare genome assemblies using Average Nucleotide Identity (ANI). FastANI is commonly used to assess genomic similarity between bacterial genomes and can assist with species-level comparisons. + + Output: Produces ANI similarity statistics and comparison reports (.txt, .tsv). + inputs: + - label: Genome assemblies + datatypes: + - fasta + outputs: + - label: ANI comparison report + button_md: Run FastANI + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffastani%2Ffastani" + + - title_md: MUMmer - Compare genome assemblies + description_md: | + Compare assembled genomes and identify similarities, differences, and structural variation between assemblies. MUMmer is commonly used for whole-genome alignment and comparative genomics analyses. + + Output: Produces genome alignment and comparison files for downstream visualisation and analysis. + inputs: + - label: Genome assemblies + datatypes: + - fasta + outputs: + - label: Genome comparison results + button_md: Run MUMmer + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmummer_mummer%2Fmummer_mummer%2F4.0.1%2Bgalaxy0&version=latest" + + + - id: tutorials + title: Tutorials + heading_md: + content: + - title_md: Genome assembly training + description_md: | + Learn about genome assembly concepts, assembly quality control, and downstream analysis using Galaxy training resources. + button_md: Galaxy Training + button_link: https://training.galaxyproject.org/training-material/topics/assembly/ \ No newline at end of file diff --git a/agrf/sections/help.yml b/agrf/sections/help.yml deleted file mode 100644 index 5ec0827..0000000 --- a/agrf/sections/help.yml +++ /dev/null @@ -1,14 +0,0 @@ -id: help -title: Galaxy Help -tabs: - - id: overview - title: Overview - heading_md: - content: - - title_md: abc - description_md: | - * abc - - - button_md: Upload data - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=upload1" \ No newline at end of file diff --git a/agrf/sections/help_contact.yml b/agrf/sections/help_contact.yml new file mode 100644 index 0000000..0a048ed --- /dev/null +++ b/agrf/sections/help_contact.yml @@ -0,0 +1,29 @@ +id: help +title: Help and Contact + +tabs: + - id: overview + title: Overview + heading_md: + + content: + - title_md: Galaxy Training Network + description_md: | + Access Galaxy tutorials, training materials, and workflow documentation for RNA-seq, metagenomics, microbial profiling, and GBS analysis. + + button_md: Open Tutorials + button_link: "https://training.galaxyproject.org/training-material/" + + - title_md: AGRF Support + description_md: | + Contact AGRF for workflow guidance, bioinformatics support, and troubleshooting assistance related to AGRF Galaxy Lab services. + + button_md: AGRF Website + button_link: "https://www.agrf.org.au/" + + - title_md: Galaxy Australia + description_md: | + Access Galaxy Australia workflows, tools, and platform resources for bioinformatics analysis. + + button_md: Open Galaxy Australia + button_link: "https://usegalaxy.org.au/" \ No newline at end of file diff --git a/agrf/sections/learn.yml b/agrf/sections/learn.yml index e756b22..2fc2617 100644 --- a/agrf/sections/learn.yml +++ b/agrf/sections/learn.yml @@ -1,14 +1,58 @@ id: learn title: Learn Galaxy + tabs: - id: overview title: Overview - heading_md: + heading_md: + Learn how to use Galaxy through tutorials, workflows, and official documentation. content: - - title_md: abc + - title_md: Galaxy Training Resources + description_md: | + Learn how to use Galaxy through step-by-step tutorials from the Galaxy Training Network. + These resources cover a wide range of bioinformatics workflows including RNA-seq, + microbiome analysis, metagenomics, and variant analysis. + button_md: Browse Tutorials + button_link: https://training.galaxyproject.org/training-material/ + + - title_md: Variant Analysis Tutorials + description_md: | + Explore tutorials for SNP calling, variant analysis, and related workflows + relevant to GBS and population genomics studies. + button_md: View Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/variant-analysis/ + + - title_md: Microbiome Tutorials + description_md: | + Learn microbial community analysis workflows in Galaxy, including QIIME 2 based + approaches for diversity analysis and taxonomic profiling. + button_md: View Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/microbiome/ + + - title_md: RNA-seq Tutorials + description_md: | + Learn RNA-seq analysis workflows in Galaxy, including alignment, quantification, + and differential expression analysis. + button_md: View Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/transcriptomics/ + + - title_md: Metagenomics Tutorials description_md: | - * abc + Explore tutorials for metagenome assembly, classification, and downstream + analysis workflows in Galaxy. + button_md: View Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/metagenomics/ - - button_md: Upload data - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=upload1" \ No newline at end of file + - title_md: GBS and Population Genomics Tutorials + description_md: | + Learn workflows relevant to Genotyping-by-Sequencing (GBS), including variant calling, + SNP analysis, and population genomics approaches in Galaxy. + button_md: View Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/variant-analysis/ + + - title_md: Galaxy Help and Documentation + description_md: | + Access official Galaxy documentation, user guides, and help resources to understand + tools, workflows, and data management in Galaxy. + button_md: Open Documentation + button_link: https://galaxyproject.org/learn/ \ No newline at end of file diff --git a/agrf/sections/metagenomics.yml b/agrf/sections/metagenomics.yml new file mode 100644 index 0000000..c6f36e4 --- /dev/null +++ b/agrf/sections/metagenomics.yml @@ -0,0 +1,235 @@ +id: metagenomics +title: Metagenomics + +tabs: + - id: overview + title: Overview + content: + + - title_md: About the service + description_md: | + Metagenomic sequencing is used to characterise microbial communities by sequencing DNA directly from environmental or biological samples. Both short-read (Illumina) and long-read sequencing approaches can be used depending on the study objectives. + + Sequencing is performed using paired-end reads (150 bp) on an Illumina NovaSeq platform. Raw sequencing data undergoes quality control and filtering to remove adapter sequences and low-quality bases. + + The processed reads are analysed to determine the taxonomic composition and functional potential of the microbial community. Taxonomic profiling is performed using Kraken2 and Bracken, enabling classification of reads across different taxonomic levels. Functional profiling is performed using HUMAnN, providing insights into gene families and metabolic pathways present in the samples. + + The final outputs include read count summaries, taxonomic classifications, functional profiling results, and interactive visualisations, allowing comprehensive analysis of microbial community structure and function. + + - title_md: Sequencing Output + description_md: | + **Read length:** 150 bp paired-end reads + + **Number of reads:** Varies per sample + + Raw sequencing reads are provided as FASTQ files containing sequence and quality score information. + + - title_md: What files are included? + description_md: | + + | Filename | Description | + |----------|------------| + | readCountsTable.tsv | Read counts for each sample across processing steps (raw, trimmed, classified) | + | taxonomic_profiling/{sample_name}/{sample_name}.kraken.report | Taxonomic classification results from Kraken2 | + | taxonomic_profiling/{sample_name}/{sample_name}.bracken.tsv | Refined taxonomic abundance estimates from Bracken | + | taxonomic_profiling/{sample_name}/{sample_name}.html | Interactive Krona plot for taxonomic composition | + | taxonomic_profiling/absolute_abundance.xlsx | Absolute abundance of taxa across samples | + | taxonomic_profiling/relative_abundance.xlsx | Relative abundance of taxa across samples | + | taxonomic_profiling/barplot/index.html | Interactive barplot visualisation of taxonomic composition | + | functional_profiling/{sample_name}_genefamilies.tsv | Gene family abundance per sample | + | functional_profiling/{sample_name}_pathabundance.tsv | Pathway abundance per sample | + | functional_profiling/{sample_name}_pathcoverage.tsv | Pathway coverage per sample | + | functional_profiling/humann2_genefamilies.tsv | Gene family count matrix across samples | + | functional_profiling/humann2_pathabundance.tsv | Pathway abundance matrix across samples | + | functional_profiling/humann2_pathcoverage.tsv | Pathway coverage matrix across samples | + + - title_md: File formats used + description_md: | + + | Type | Description | + |------|------------| + | .fastq / .fastq.gz | Raw sequencing reads | + | .tsv | Tab-separated files for read counts, taxonomy, and functional profiles | + | .xlsx | Excel files for abundance tables | + | .html | Interactive visualisations (Krona plots, barplots) | + + - id: tools + title: Tools + content: + subsections: + + - id: short_read_assembly + title: Short-read assembly + content: + - title_md: MEGAHIT - Assemble short-read metagenomes + description_md: | + Assemble short-read metagenomic sequencing data into longer contiguous sequences called contigs. + + MEGAHIT is designed for large and complex Illumina metagenomic datasets and helps reconstruct microbial sequences from short-read data. + + Output: + Produces assembled contig sequences for downstream binning and annotation (.fasta). + inputs: + - label: Illumina short-read FASTQ data + datatypes: + - fastqsanger + - fastqsanger.gz + outputs: + - label: Assembled contigs (.fasta) + button_md: Run MEGAHIT + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmegahit%2Fmegahit" + + - title_md: metaSPAdes - Assemble short-read metagenomes + description_md: | + Assemble short-read metagenomic sequencing data into contigs using the metaSPAdes assembler. + + metaSPAdes is designed for metagenomic datasets containing mixed microbial communities and is commonly used for Illumina short-read metagenome assembly. + + Output: + Produces assembled contigs and assembly graph files for downstream analysis (.fasta, .gfa). + inputs: + - label: Illumina short-read FASTQ data + datatypes: + - fastqsanger + - fastqsanger.gz + outputs: + - label: Assembled contigs and assembly graph files (.fasta, .gfa) + button_md: Run metaSPAdes + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fnml%2Fmetaspades%2Fmetaspades" + + - id: long_read_assembly + title: Long-read assembly + content: + - title_md: meta-hifiasm - Assemble PacBio HiFi metagenomes + description_md: | + Assemble long-read metagenomic sequencing data generated using PacBio HiFi sequencing. + + meta-hifiasm is designed for metagenomic assembly from highly accurate long reads and can generate more contiguous assemblies for complex microbial communities. + + Output: + Produces assembled contigs and assembly graph files for downstream analysis (.fasta, .gfa). + inputs: + - label: PacBio HiFi long-read FASTQ data + datatypes: + - fastqsanger + - fastqsanger.gz + outputs: + - label: Assembled contigs and assembly graph files (.fasta, .gfa) + button_md: Run meta-hifiasm + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fgalaxy-australia%2Fhifiasm_meta%2Fhifiasm_meta" + + - id: assembly_qc + title: Assembly quality control + content: + - title_md: QUAST - Assess assembly quality + description_md: | + Assess the quality of metagenomic assemblies by calculating statistics such as contig length, N50, GC content, completeness, and fragmentation. This tool helps evaluate whether assembled contigs are suitable for downstream binning and annotation. + + Output: Produces assembly quality reports and summary statistics (.html, .txt, .tsv). + inputs: + - label: Assembled contigs (FASTA) + datatypes: + - fasta + outputs: + - label: Assembly quality reports (.html, .tsv) + button_md: Run QUAST + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fquast%2Fquast" + + - id: taxonomicprofiling + title: Taxonomic profiling + content: + + - title_md: Kraken2 - Taxonomic classification + description_md: | + Classify metagenomic sequencing reads against reference databases to identify microbial taxa present within the samples. This tool enables rapid taxonomic profiling of microbial communities from environmental or biological samples. + + Output: Produces taxonomic classification reports and abundance summaries (.txt, .tsv). + inputs: + - label: FASTQ reads for metagenomic classification + datatypes: + - fastqsanger + outputs: + - label: Taxonomic classification reports + button_md: Run Kraken2 + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fkraken2%2Fkraken2" + + - id: binning + title: Binning + content: + - title_md: MetaBAT2 - Bin contigs into genomes + description_md: | + Group assembled contigs into genome bins representing individual microbial genomes based on sequence composition and read coverage patterns. This step helps reconstruct metagenome-assembled genomes (MAGs) from complex microbial communities. + + Output: Produces genome bins for downstream quality assessment and taxonomic analysis (.fasta). + inputs: + - label: Assembled contigs (FASTA) + datatypes: + - fasta + - label: BAM files generated by mapping reads back to assembled contigs + datatypes: + - bam + outputs: + - label: Genome bins / MAGs (FASTA) + button_md: Run MetaBAT2 + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmetabat2%2Fmetabat2" + + - id: binning_qc + title: Binning quality control + content: + - title_md: CheckM2 - Assess completeness and contamination + description_md: | + Assess the quality of genome bins by estimating genome completeness and contamination. This tool helps determine whether reconstructed microbial genomes are suitable for downstream taxonomic and functional analysis. + + Output: Produces genome quality assessment reports containing completeness and contamination estimates (.tsv, .txt). + + inputs: + - label: Genome bins (FASTA) + datatypes: + - fasta + outputs: + - label: Completeness and contamination reports (.tsv) + button_md: Run CheckM2 + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fcheckm2%2Fcheckm2" + + + - id: taxonomy + title: Taxonomic classification + content: + - title_md: GTDB-Tk - Assign taxonomy to genome bins + description_md: | + Assign taxonomy to genome bins using the Genome Taxonomy Database (GTDB). This tool classifies bacterial and archaeal genomes based on phylogenetic placement and reference genome comparison. + + Output: Produces taxonomic classification tables and phylogenetic placement results (.tsv, .tree). + inputs: + - label: Genome bins + datatypes: + - fasta + outputs: + - label: Taxonomic classification (TSV) + button_md: Run GTDB-Tk + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fgtdbtk_classify_wf%2Fgtdbtk_classify_wf" + + - id: functional_annotation + title: Functional annotation + content: + - title_md: Bakta - Functional annotation of genomes + description_md: | + Annotate assembled genomes and genome bins to identify genes, coding sequences, RNAs, and functional features. This tool helps interpret the biological functions and metabolic potential of microbial communities. + + Output: Produces annotated genome files and functional annotation reports (.gff3, .gbff, .faa, .tsv). + inputs: + - label: Genome bins or contigs (FASTA) + datatypes: + - fasta + outputs: + - label: Annotated genomes and functional features + button_md: Run Bakta + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbakta%2Fbakta%2F1.9.4%2Bgalaxy1&version=latest" + + - id: tutorials + title: Tutorials + content: + - title_md: Metagenomics analysis tutorials + description_md: Learn how to perform metagenomic analysis in Galaxy, including taxonomic classification, assembly, binning, and downstream functional analysis workflows. + button_md: Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/metagenomics/ \ No newline at end of file diff --git a/agrf/sections/microbial.yml b/agrf/sections/microbial.yml index 6a9d7d0..c364eea 100644 --- a/agrf/sections/microbial.yml +++ b/agrf/sections/microbial.yml @@ -1,5 +1,5 @@ id: microbial -title: Microbial profiling - PacBio full length 16S rRNA analysis +title: Microbial profiling tabs: - id: overview title: Overview @@ -7,15 +7,21 @@ tabs: content: - title_md: About the service description_md: | - PacBio HiFi full-length 16S data is quality filtered and denoised to high quality amplicon single variants (ASVs) using QIIME2 and DADA2. ASV classification is performed using two approaches. We perform a consensus alignment classification (using VSEARCH) against the Genome Taxonomy Database (GTDB r207). This approach should give high consistency. We also perform naΓ―ve Bayesian machine learning based classification (DADA2) using three databases that successively fall over to the next one if a species level match is not found. In order, they are the Genome Taxonomy Database (GTDB r207), the SILVA rRNA database (v138), and the NCBI RefSeq 16S rRNA database supplemented by the Ribosomal Database Project (RDP). This should give better classification for low abundance ASVs + Microbial profiling is used to identify and compare microbial communities across samples. This can include long-read approaches, such as PacBio HiFi full-length 16S rRNA sequencing, as well as short-read amplicon sequencing approaches. + + Short-read amplicon sequencing is commonly used for targeted microbial community analysis and taxonomic profiling. Long-read approaches, such as PacBio HiFi full-length 16S sequencing, can provide improved taxonomic resolution by sequencing the full-length 16S rRNA gene. + + The tools and resources in this section support exploration of microbial community composition, diversity analysis, taxonomic classification, and downstream visualisation for both short-read and long-read microbial profiling datasets. + + For PacBio HiFi full-length 16S data, reads are quality filtered and denoised to high-quality amplicon sequence variants (ASVs) using QIIME2 and DADA2. ASV classification is performed using two approaches. We perform a consensus alignment classification using VSEARCH against the Genome Taxonomy Database (GTDB r207). This approach provides consistent taxonomic assignment. We also perform naΓ―ve Bayesian classification using three databases that successively fall back to the next database if a species-level match is not found. In order, they are the Genome Taxonomy Database (GTDB r207), the SILVA rRNA database (v138), and the NCBI RefSeq 16S rRNA database supplemented by the Ribosomal Database Project (RDP). This approach may improve classification sensitivity for low-abundance ASVs. ##### Results include **HiFi read data** - - Demultiplexed .FASTQ files (per sample). + - Demultiplexed FASTQ files (.fastq). - **A summary report (html):** + **A summary report (.html):** - Project summary QC statistics - Table of quality filtered read counts (per sample) @@ -30,16 +36,16 @@ tabs: **Number of Reads:** Typically greater than 50K reads per sample. - Read output varies depending on the composition and quality of the sample. Analyses provided by AGRF capps reads at 50K/sample. All reads captured during sequencing are available in the FASTQ files. Reviewing the rarefaction curves can provide insight to whether running an analysis with uncapped reads will provide more information or not. + Read output varies depending on sample composition and sequencing quality. Analyses provided by AGRF caps reads at 50K/sample. All reads captured during sequencing are available in the FASTQ files. Reviewing the rarefaction curves can provide insight to whether running an analysis with uncapped reads will provide more information or not. - title_md: What files are included? description_md: | - | Fileanme | Description | + | Filename | Description | | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | **Raw Data** | | | Demultiplexed \*.FASTQ file (1 file per sample) | Contains sequencing reads for each sample | | **Sequencing Metrics** | | - | πŸ“ˆ samples.demux.summary.qzv | QIIME visualization summarizing.
a) Per-sample sequence counts (inc min, median, mean max, frequency histogram).
b) Quality (interactive plot of read length by quality score show seven-number quality summary plus sequence length summary). | + | πŸ“ˆ samples.demux.summary.qzv | QIIME visualisation summarizing.
a) Per-sample sequence counts (inc min, median, mean max, frequency histogram).
b) Quality (interactive plot of read length by quality score show seven-number quality summary plus sequence length summary). | | πŸ–Ή per-sample-fastq-counts.tsv | Per-sample sequence counts | | **Rarefaction** | | | πŸ“ˆ alpha-rarefaction-curves.qzv | Rarefaction curve showing feature saturation for each sample. | @@ -66,39 +72,28 @@ tabs: | πŸ–Ή unweighted_unifrac_distance_matrix.tsv
πŸ—œοΈ unweighted_unifrac_distance_matrix.qza | UniFrac metrics are phylogenetically aware, treating closely related organisms as more similar. UniFrac metrics utilize a shared phylogenetic tree to calculate distance is based on the fraction of unshared branch lengths. Unweighted UniFrac uses only the presence or absence of taxa. | | πŸ–Ή weighted_unifrac_distance_matrix.tsv
πŸ—œοΈ weighted_unifrac_distance_matrix.qza | UniFrac metric additionally weights branch length with the abundance difference between the two samples. | | **PCA results and EMPeror plots** | | - | πŸ—œοΈ jaccard_pcoa_results.qza
πŸ“ˆ jaccard_emperor.qzv | PCA results and visualization of the Jaccard distance. | - | πŸ—œοΈ bray_curtis_pcoa_results.qza
πŸ“ˆ bray_curtis_emperor.qzv | PCA results and visualization of the Bray-Curtis distance. | - | πŸ—œοΈ unweighted_unifrac_pcoa_results.qza
πŸ“ˆ unweighted_unifrac_emperor.qzv | PCA results and visualization of the unweighted UniFrac distance. | - | πŸ—œοΈ `weighted_unifrac_pcoa_results.qza`
πŸ“ˆ weighted_unifrac_emperor.qzv | PCA results and visualization of the weighted UniFrac distance. | + | πŸ—œοΈ jaccard_pcoa_results.qza
πŸ“ˆ jaccard_emperor.qzv | PCA results and visualisation of the Jaccard distance. | + | πŸ—œοΈ bray_curtis_pcoa_results.qza
πŸ“ˆ bray_curtis_emperor.qzv | PCA results and visualisation of the Bray-Curtis distance. | + | πŸ—œοΈ unweighted_unifrac_pcoa_results.qza
πŸ“ˆ unweighted_unifrac_emperor.qzv | PCA results and visualisation of the unweighted UniFrac distance. | + | πŸ—œοΈ `weighted_unifrac_pcoa_results.qza`
πŸ“ˆ weighted_unifrac_emperor.qzv | PCA results and visualisation of the weighted UniFrac distance. | - title_md: File formats used description_md: | | Type | Description | | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------- | | `.qza`
QIIME Zipped Artifact | Zip file containing folders for data and data provenance. Can be opened with any zip tool or extracted with QIIME2 exact. | - | `.qzv`
QIIME Zipped Visualization | Zip file containing data and data provenance. Can be visualized with [QIIME 2 View](https://view.qiime2.org/). | - | `.biom`
Biological Observation Matrix | Can be imported into popular packages such as phyloseq for downstream processing and visualization. (https://biom-format.org/). | + | `.qzv`
QIIME Zipped Visualisation | Zip file containing data and data provenance. Can be visualised with [QIIME 2 View](https://view.qiime2.org/). | + | `.biom`
Biological Observation Matrix | Can be imported into popular packages such as phyloseq for downstream processing and visualisation. (https://biom-format.org/). | | `.tsv`
tab separated values | Table in text file format. Can be opened in any text editor or imported into Excel. | - - - id: workflows - title: Workflows - heading_md: - content: - - - title_md: 16S HiFi workflow - description_md: PacBio data - button_md: Request support - button_link: https://usegalaxy.org.au/workflows/run?id=c0a59c4ff4fcc845 - - id: tools title: Tools content: subsections: - - id: QIIME2_formats - title: Working with QIIME 2 files - content: + - id: QIIME2_formats + title: Working with QIIME 2 files + content: - title_md: Details description_md: | @@ -132,18 +127,33 @@ tabs: button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2_core__tools__export%2Fqiime2_core__tools__export" - - title_md: QIIME vizualisation extractor - Visualise .qzv files in Galaxy. + - title_md: qiime2 feature-table summarize - Summarize feature table description_md: | - Use this tool to visualisae .qzv files within Galaxy. + Generate a summary of a feature table, including sequence counts per sample, feature frequency distribution, and sequencing depth information. This tool helps assess dataset quality before downstream diversity and taxonomic analysis. + + Output: Interactive summary visualisations and sequencing depth statistics for each sample (.qzv). + inputs: + - label: QIIME 2 Artifact file - FeatureTable[Frequency] + datatypes: + - qza + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__feature_table__summarize%2Fqiime2__feature_table__summarize" + + - title_md: QIIME visualisation extractor - Visualise .qzv files in Galaxy. + description_md: | + Extract and visualise the contents of QIIME 2 `.qzv` visualisation files directly within Galaxy. This tool allows users to inspect interactive visualisation outputs without downloading files locally. + + Output: Extracted visualisation contents displayed directly within Galaxy from QIIME visualisation files (.qzv). inputs: - label: QIIME 2 Visualisation file datatypes: - qzv button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fqiime_extract_viz%2Fqiime_extract_viz" - - title_md: QIMME 2 View - Visualise .qza or .qzv files using an online tool + - title_md: QIIME 2 View - Visualise .qza or .qzv files using an online tool description_md: | - Drag and drop QIIME 2 Artifact or Visualisation files from your computer. Best used with .qzv files. While this tool can load .qza files to extract information like citations, providence and metadata, it does not extract the data stored in these files. + Visualise QIIME 2 `.qza` and `.qzv` files using the online QIIME 2 View platform. This tool helps users explore interactive visualisations, metadata, provenance information, and analysis summaries. + + Output: Interactive online visualisations for QIIME 2 artifact and visualisation files (.qza, .qzv). inputs: - label: QIIME 2 Artifact or visualisation file datatypes: @@ -154,14 +164,28 @@ tabs: view_tip: View in QIIME2 - - id: alpha_diversity - title: Alpha Diversity + - title_md: qiime2 feature-table filter-samples - Filter samples using metadata + description_md: | + Filter samples from a feature table using metadata criteria to remove unwanted samples or outliers before downstream analysis. This tool is commonly used to retain specific sample groups for diversity and taxonomic analysis. + + Output: A filtered feature table containing only the selected samples (.qza). + inputs: + - label: QIIME 2 Artifact file - FeatureTable[Frequency] + datatypes: + - qza + - label: sample metadata + datatypes: + - tsv + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__feature_table__filter_samples%2Fqiime2__feature_table__filter_samples" - content: + - id: alpha_diversity + title: Alpha Diversity - - title_md: Details - description_md: | - Alpha diversity measures the diversity *within* a single sample. There are a number of different metrics used. AGRF's analysis includes four alpha diversity metrics (stored within separate .qza files). + content: + + - title_md: Details + description_md: | + Alpha diversity measures the diversity *within* a single sample. There are a number of different metrics used. AGRF's analysis includes four alpha diversity metrics (stored within separate .qza files). - `observed_features_vector.qza` - Sample richness per sample. A count of the number of features (i.e. species) observed per sample. - `shannon_vector.qza` - Shannon entropy (i.e. Shannon index) for each sample. This is a quantitative measure of community richness (number of species present) and evenness. Specifically, it quantifies the uncertainty in predicting the species of an individual microbe (or effectively a read) taken at random from the sample. @@ -171,14 +195,14 @@ tabs: Each .qza file contains alpha-diversity.tsv which can be extracted in Galaxy using the `qiime tools export` tool. The .tsv contains two columns: [sample name.fastq] , [alpha diversity metric] - inputs: + inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - - title_md: qiime2 diversity alpha - Calculate alpha diversity (non-phylogenetic) - description_md: | + - title_md: qiime2 diversity alpha - Calculate alpha diversity (non-phylogenetic) + description_md: | Non-phylogenetic alpha diversity metrics provide a general overview of diversity based on counts or proportions. Common examples (included in AGRF's analysis) are: * Observed features (richness) @@ -187,12 +211,13 @@ tabs: Use this tool to calculate other non-phylogenetic alpha diversity metrics. - inputs: + Output: Alpha diversity values for each sample stored as QIIME 2 artifacts (.qza). + inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - buttons: + buttons: - icon: run link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha%2Fqiime2__diversity__alpha" tip: QIIME2 - Alpha diversity @@ -203,30 +228,32 @@ tabs: link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_phylogenetic%2Fqiime2__diversity__alpha_phylogenetic" tip: QIIME2 - Alpha diversity (phlyogenetic) - #button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha%2Fqiime2__diversity__alpha" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha%2Fqiime2__diversity__alpha" - - title_md: qiime2 diversity alpha-phylogenetic - Calculate alpha diversity (with a phylogenetic tree) - description_md: | - Phylogenetic alpha diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included om AGRF's analysis) is: + - title_md: qiime2 diversity alpha-phylogenetic - Calculate alpha diversity (with a phylogenetic tree) + description_md: | + Phylogenetic alpha diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included in AGRF's analysis) is: * Faith's Phylogenetic Distance Use this tool to calculate other phylogenetic alpha diversity metrics. + + Output: Phylogenetic diversity values for each sample stored as QIIME 2 artifacts (.qza). - inputs: + inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_phylogenetic%2Fqiime2__diversity__alpha_phylogenetic" - button_link: /request/vcs - - - title_md: qiime2 diversity alpha-correlation - Correlate alpha diversity with sample metadata - description_md: | - Determine whether numeric sample metadata columns are correlated with alpha diversity. - - inputs: + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_phylogenetic%2Fqiime2__diversity__alpha_phylogenetic" + + - title_md: qiime2 diversity alpha-correlation - Correlate alpha diversity with sample metadata + description_md: | + Determine whether numeric sample metadata variables are correlated with alpha diversity values across samples. This analysis helps identify relationships between microbial diversity and experimental or environmental metadata variables. + + Output: Statistical correlation results and visualisations showing relationships between alpha diversity metrics and metadata variables (.qzv). + inputs: - label: QIIME 2 Artifact file - Alpha Diversity datatypes: - qza @@ -235,13 +262,15 @@ tabs: - qza - tsv - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_correlation%2Fqiime2__diversity__alpha_correlation" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_correlation%2Fqiime2__diversity__alpha_correlation" - - title_md: qiime2 diversity alpha-group-significance - Correlate alpha diversity with groups in sample metadata - description_md: | - Visually and statistically compare groups of alpha diversity values. + - title_md: qiime2 diversity alpha-group-significance - Correlate alpha diversity with groups in sample metadata + description_md: | + Statistically compare alpha diversity values between groups defined in the sample metadata. This tool helps determine whether microbial diversity differs significantly between experimental groups or sample categories. + + Output: Boxplots and statistical significance results comparing alpha diversity between groups (.qzv). - inputs: + inputs: - label: QIIME 2 Artifact file - Alpha Diversity datatypes: - qza @@ -250,37 +279,39 @@ tabs: - qza - tsv - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_group_significance%2Fqiime2__diversity__alpha_group_significance" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_group_significance%2Fqiime2__diversity__alpha_group_significance" - - title_md: qiime2 diversity alpha-rarefaction - Assess sequencing depth sufficiency - description_md: | + - title_md: qiime2 diversity alpha-rarefaction - Assess sequencing depth sufficiency + description_md: | QIIME 2 repeatedly subsamples (rarefies) each sample’s sequence data at different depths (e.g., 1000, 2000, 3000 reads, etc.). For each depth, it calculates an alpha diversity metric (e.g., Shannon index, Faith's PD). It does this multiple times per depth to account for random variation (controlled by the --p-iterations parameter). The result is a curve for each sample showing diversity vs. sampling effort. - inputs: + Output: Rarefaction curves showing diversity trends across sequencing depths for each sample (.qzv). + + inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_rarefaction%2Fqiime2__diversity__alpha_rarefaction" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_rarefaction%2Fqiime2__diversity__alpha_rarefaction" - - id: beta_diversity - title: Beta Diversity + - id: beta_diversity + title: Beta Diversity - content: + content: - title_md: Details description_md: | - Beta diversity measures the diversity *between* samples. There are a number of different metrics used. AGRF's analysis includes four beta diversity metrics (stored within separate .qza files). + Beta diversity measures the diversity *between* samples. There are a number of different metrics used. AGRF's analysis includes four beta diversity metrics (stored within separate .qza files). - `observed_features_vector.qza` - Sample richness per sample. A count of the number of features (i.e. species) observed per sample. - `shannon_vector.qza` - Shannon entropy (i.e. Shannon index) for each sample. This is a quantitative measure of community richness (number of species present) and evenness. Specifically, it quantifies the uncertainty in predicting the species of an individual microbe (or effectively a read) taken at random from the sample. - `evenness_vector.qza` - Pielous evenness index for each sample. A measure of how close in numbers (sequence counts) each species in a sample is. It is the ratio of the Shannon index to the maximum possible Shannon index if every species was equally likely. Value between 0 and 1. The closer to 1 the more even. - `faith_pd_vector.qza` - Faiths phylogenetic distance. A phylogenetically aware alpha diversity metric. Equal to the sum of all branch lengths of the phylogenetic tree that spans all members of the sample. The higher the number the greater the diversity. - Each .qza file contains alpha-diversity.tsv which can be extracted in Galaxy using the `qiime tools export' tool. The .tsv contains two columns: `[sample name.fastq]` , `[alpha diversity metric]` + Each .qza file contains alpha-diversity.tsv which can be extracted in Galaxy using the `qiime tools export` tool. The .tsv contains two columns: `[sample name.fastq]` , `[alpha diversity metric]` inputs: @@ -291,42 +322,38 @@ tabs: - title_md: qiime2 diversity-beta - Calculate beta diversity (non-phylogenetic) description_md: | - Non-phylogenetic beta diversity metrics provide a general overview of diversity based on counts or proportions. Common examples (included in AGRF's analysis) are: - - * Observed features (richness) - * Shannon Index (richness + evenness) - * Pielou's Evenness (how equally distributed species are) - - Use this tool to calculate other non-phylogenetic beta diversity metrics. + Calculate beta diversity metrics to measure differences in microbial community composition between samples. These metrics help identify similarities or differences between microbial communities across samples or experimental groups. + + Output: Beta diversity distance matrices for downstream ordination and clustering analysis (.qza). inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha%2Fqiime2__diversity__alpha" + + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta%2Fqiime2__diversity__beta" - title_md: qiime2 diversity beta-phylogenetic - Calculate beta diversity (with a phylogenetic tree) description_md: | - Phylogenetic beta diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included om AGRF's analysis) is: + Phylogenetic beta diversity metrics are useful when evolutionary distinctivness is relevant to your hypothesis (e.g., comparing ecosystems or communities with potentially different evolutionary histories). A common example (included in AGRF's analysis) is: - * Faith's Phylogenetic Distance + * Faith's Phylogenetic Distance - Use this tool to calculate other phylogenetic beta diversity metrics. + Use this tool to calculate other phylogenetic beta diversity metrics. inputs: - label: QIIME 2 Artifact file - FeatureTable[Frequency] datatypes: - qza - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_phylogenetic%2Fqiime2__diversity__alpha_phylogenetic" - button_link: /request/vcs - + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta_phylogenetic%2Fqiime2__diversity__beta_phylogenetic" + - title_md: qiime2 diversity beta-correlation - Correlate beta diversity with sample metadata description_md: | - Determine whether numeric sample metadata columns are correlated with beta diversity. - + Determine whether numeric sample metadata variables are correlated with beta diversity differences between samples. This analysis helps identify associations between microbial community composition and metadata variables. + + Output: Statistical correlation results describing relationships between beta diversity distances and metadata variables (.qzv). inputs: - label: QIIME 2 Artifact file - Beta Diversity datatypes: @@ -335,14 +362,14 @@ tabs: datatypes: - qza - tsv - - - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_correlation%2Fqiime2__diversity__alpha_correlation" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta_correlation%2Fqiime2__diversity__beta_correlation" - title_md: qiime2 diversity beta-group-significance - Correlate beta diversity with groups in sample metadata description_md: | - Visually and statistically compare groups of beta diversity values. + Statistically compare microbial community composition between groups defined in the sample metadata. This analysis is commonly performed using methods such as PERMANOVA to evaluate whether groups differ significantly in community structure. + + Output: Statistical significance results and visualisations comparing beta diversity between groups (.qzv). inputs: - label: QIIME 2 Artifact file - Beta Diversity @@ -353,8 +380,21 @@ tabs: - qza - tsv - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_group_significance%2Fqiime2__diversity__alpha_group_significance" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta_group_significance%2Fqiime2__diversity__beta_group_significance" + + - title_md: qiime2 diversity pcoa - Principal coordinates analysis + description_md: | + Perform principal coordinates analysis (PCoA) on a beta diversity distance matrix to visualise the relationships between samples in a reduced dimensional space. This tool helps identify clustering patterns, sample similarity, and potential separation between experimental groups. + + Output: Principal coordinate values and ordination results for sample clustering visualisation (.qza). + + inputs: + - label: QIIME 2 Artifact file - Distance Matrix + datatypes: + - qza + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__pcoa%2Fqiime2__diversity__pcoa" + - title_md: qiime2 diversity beta-rarefaction - Assess sequencing depth sufficiency description_md: | QIIME 2 repeatedly subsamples (rarefies) each sample’s sequence data at different depths (e.g., 1000, 2000, 3000 reads, etc.). @@ -367,12 +407,101 @@ tabs: datatypes: - qza - button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__alpha_rarefaction%2Fqiime2__diversity__alpha_rarefaction" + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__diversity__beta_rarefaction%2Fqiime2__diversity__beta_rarefaction" + + - title_md: qiime2 composition ancom - Differential abundance of taxa + description_md: | + Identify taxa that are differentially abundant between sample groups using ANCOM. This tool helps detect microbial taxa that show significant abundance differences across experimental conditions or metadata categories. + + Output: Statistical results and visualisations identifying differentially abundant taxa between groups (.qzv). + inputs: + - label: Feature Table + datatypes: + - qza + - label: sample metadata + datatypes: + - tsv + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__composition__ancom%2Fqiime2__composition__ancom" + + + - title_md: PICRUSt2 metagenome prediction - Predict microbial functions + description_md: | + Predict microbial functional profiles from 16S rRNA sequencing data. This tool estimates potential gene families and metabolic pathways based on the observed microbial community composition. + + Output: Predicted functional pathway and gene family abundance tables for downstream analysis (.tsv, .qza). + inputs: + - label: Sequence abundance table (OTUs or ASVs) + datatypes: + - biom + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fpicrust2_metagenome_pipeline%2Fpicrust2_metagenome_pipeline" + - title_md: Krona - Interactive taxonomic visualisation + description_md: | + Generate interactive hierarchical visualisations of microbial taxonomy and abundance data. This tool allows users to explore microbial composition across multiple taxonomic levels. + + Output: Interactive zoomable taxonomic visualisations for microbial community exploration (.html). + inputs: + - label: Taxonomy classification table + datatypes: + - tsv + - biom + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fsaskia-hiltemann%2Fkrona_text%2Fkrona-text" + - title_md: qiime2 feature-table heatmap - Visualise feature abundance as a heatmap + description_md: | + Visualise feature abundance patterns across samples using a heatmap. This tool helps identify highly abundant features and differences in feature distribution between samples or groups. + + Output: Interactive heatmaps showing feature abundance across samples (.qzv). + inputs: + - label: QIIME 2 Artifact file - FeatureTable[Frequency] + datatypes: + - qza + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__feature_table__heatmap%2Fqiime2__feature_table__heatmap" + + - title_md: qiime2 taxa barplot - Visualise taxonomic composition + description_md: | + Visualise microbial taxonomic composition and relative abundance across samples. This tool helps compare community structure at different taxonomic levels between samples or groups. + + Output: Interactive stacked bar plots showing taxonomic abundance across samples (.qzv). + inputs: + - label: Feature table with taxonomy + datatypes: + - qza + - label: Taxonomy assignments + datatypes: + - qza + - label: sample metadata + datatypes: + - tsv + button_link: "{{ galaxy_base_url }}?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fq2d2%2Fqiime2__taxa__barplot%2Fqiime2__taxa__barplot" + + - id: species_identification + title: Species identification and validation + content: + - title_md: BLAST - Identify sequences using database search + description_md: | + Compare query sequences against reference databases to identify similar sequences and possible species matches. This tool is useful for validating species identity or checking sequence similarity against known references. + + Output: Alignment results showing matched reference sequences, similarity scores, and sequence identity (.txt, .tsv). + button_md: Run BLAST + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu/repos/q2d2/qiime2__feature_classifier__classify_consensus_blast/qiime2__feature_classifier__classify_consensus_blast" + - title_md: MAFFT - Multiple sequence alignment + description_md: | + Align multiple nucleotide sequences with reference sequences for comparison and phylogenetic analysis. This tool helps identify conserved and variable regions across sequences. + + Output: Aligned sequence files for downstream phylogenetic analysis (.fasta). + button_md: Run MAFFT + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu/repos/q2d2/qiime2__alignment__mafft/qiime2__alignment__mafft" + - title_md: FastTree - Build phylogenetic tree + description_md: | + Build approximately maximum-likelihood phylogenetic trees from aligned sequence data. This tool helps compare evolutionary relationships between sequences or expected species. + + Output: Phylogenetic tree files for visualising sequence relatedness (.nwk). + button_md: Run FastTree + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu/repos/iuc/fasttree/fasttree" - id: tutorials title: Tutorials @@ -380,7 +509,7 @@ tabs: content: - title_md: Calculating Ξ± and Ξ² diversity from microbiome taxonomic data - description_md: tool + description_md: Learn how to analyse microbiome data in Galaxy, including calculating alpha and beta diversity, exploring taxonomic composition, and visualising microbial community differences. button_md: Tutorials button_link: https://training.galaxyproject.org/training-material//topics/microbiome/tutorials/diversity/tutorial.html @@ -400,3 +529,4 @@ tabs: # description_md: Contact AGRF for more help with your data. # button_md: Contact AGRF # button_link: /request/support + diff --git a/agrf/sections/moreanalysis.yml b/agrf/sections/moreanalysis.yml index c3925e3..32de14d 100644 --- a/agrf/sections/moreanalysis.yml +++ b/agrf/sections/moreanalysis.yml @@ -56,3 +56,43 @@ tabs: description_md: This tool does xyz - title_md: WF1 description_md: This wf does xyz + + - id: gbs + title: GBS + heading_md: > + content: + - title_md: ustacks + description_md: > + Build loci for each sample from sequencing reads. + button_md: Run ustacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_ustacks%2Fstacks_ustacks" + + - title_md: cstacks + description_md: > + Create a catalog of loci from multiple samples. + button_md: Run cstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_cstacks%2Fstacks_cstacks" + . + - title_md: sstacks + description_md: > + Match individual samples to the catalog of loci. + button_md: Run sstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_sstacks%2Fstacks2_sstacks" + + - title_md: tsv2bam + description_md: > + Convert stacks output into BAM format for downstream analysis. + button_md: Run tsv2bam + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_tsv2bam%2Fstacks2_tsv2bam" + + - title_md: gstacks + description_md: > + Assemble loci and call SNPs across all samples. + button_md: Run gstacks + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_gstacks%2Fstacks2_gstacks" + + - title_md: populations + description_md: > + Calculate population-level statistics and export variant data. + button_md: Run populations + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_populations%2Fstacks2_populations" \ No newline at end of file diff --git a/agrf/sections/qualitycontrol.yml b/agrf/sections/qualitycontrol.yml deleted file mode 100644 index 66f69c3..0000000 --- a/agrf/sections/qualitycontrol.yml +++ /dev/null @@ -1,102 +0,0 @@ -id: qc -title: Quality control -tabs: - - id: help - title: Overview - heading_md: > - content: - - title_md: What is quality control? - description_md: | - Assess the quality and characteristics of your raw sequencing data, and perform data cleaning prior to analysis. - button_md: Quality Control Tutorial - button_link: https://training.galaxyproject.org/training-material/topics/sequence-analysis/tutorials/quality-control/tutorial.html - - title_md: What tools and workflows can I use for quality control? - description_md: | - * Galaxy has a range of tools available to perform quality control. - - * Select your datatype in the tabs above to see relevant tools and workflows. - - title_md: Converting data to different formats - description_md: | - Some tools require data in different formats. - Learn about converting between bam, fastq and fastqsanger formats, and compressing/uncompressing files. - button_md: Tutorial - button_link: add link - - - id: Illumina - title: Illumina data - heading_md: - content: - - title_md: FastQC - produce sequence quality reports - description_md: > -

- Generate a range of reports about raw data quality and characteristics. -

- inputs: - - datatypes: - - fastq - - fastq.gz - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" - - title_md: FastP - produce sequence quality reports, trimming & filtering - description_md: > -

- This tool is faster than FastQC, and can also trim reads and filter by quality. -

- inputs: - - datatypes: - - fastqsanger - - fastqsanger.gz - button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffastp%2Ffastp" - - title_md: QC of Illumina data - workflow - description_md: > -

- QC of paired-end Illumina data.

Tools: FastQC MultiQC -

- button_link: https://genome.usegalaxy.org.au/u/anna/w/qc-illumina-data - button_tip: Run workflow - - - id: Pacbio - title: Pacbio data - heading_md: - content: - - title_md: Convert BAM files to Fastq - workflow - description_md: > -

- Run this workflow first if your raw data is in css.bam format.

Tools: SamToFastq Samtools flagstat -

- button_link: link - button_tip: Run workflow - - - title_md: Remove Hifi adapters - workflow - description_md: > -

- Filter for adapter contamination.

Tools: HiFiAdapterFilt -

- button_link: add link - button_tip: Run workflow - - # - id: Nanopore - # title: Nanopore data - # heading_md: - # content: - # - title_md: NanoPlot - visualize long read data - # description_md: > - #

- # A plotting suite for long read sequencing data and alignments. - #

- # inputs: - # - datatypes: - # - fastq - # - fastq.gz - # button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fnanoplot%2Fnanoplot" - # button_tip: Run tool - - # - title_md: NanoFilt - filter and trim long read data - # description_md: > - #

- # Filter reads based on quality, length, or GC content. - #

- # inputs: - # - datatypes: - # - fastqsanger - # button_link: add tool - diff --git a/agrf/sections/rnaseq.yml b/agrf/sections/rnaseq.yml new file mode 100644 index 0000000..55fa8f2 --- /dev/null +++ b/agrf/sections/rnaseq.yml @@ -0,0 +1,317 @@ +id: rnaseq +title: RNA-seq + +tabs: + - id: overview + title: Overview + content: + + - title_md: About the service + description_md: | + Illumina RNA-seq is used to quantify gene expression by sequencing transcriptomic RNA from biological samples. + + Sequencing is performed using paired-end reads (150 bp) on an Illumina NovaSeq platform. Raw sequencing data undergoes primary processing including demultiplexing and quality control to generate FASTQ files. + + The data is processed through a standard RNA-seq analysis workflow, which includes trimming, alignment to a reference genome, transcript assembly, and gene-level quantification. Reads are aligned using the STAR aligner, and gene counts are summarised using featureCounts. + + Optional downstream analysis includes differential gene expression using edgeR, enabling identification of significantly differentially expressed genes between experimental conditions. + + The final outputs provide gene expression measurements, alignment data, transcript structures, and statistical analysis results. + + - title_md: Sequencing Output + description_md: | + **Read length:** 150 bp paired-end reads + + **Number of reads:** Varies per sample (typically millions of reads per sample) + + Raw sequencing reads are provided as FASTQ files containing sequence and quality score information. + + - title_md: What files are included? + description_md: | + + | Filename | Description | + |----------|------------| + | Demultiplexed FASTQ files (.fastq.gz) | Raw sequencing reads for each sample | + | *.bam | Aligned reads mapped to the reference genome | + | *.bam.bai | BAM index files for visualisation | + | features_summary.csv | Summary of mapping statistics with genome and exons | + | gene_counts.txt | Gene-level read count matrix | + | *.gtf | Transcript assembly (gene and isoform structure) | + | MDSplot.pdf | Sample clustering visualisation | + | Differential expression analysis results | MDS plots, box plots, library size plots, DE tables, smear plots, and heatmaps from edgeR analysis | + - title_md: File formats used + description_md: | + + | Type | Description | + |------|------------| + | .fastq / .fastq.gz | Raw sequencing reads | + | .bam | Binary alignment files storing mapped reads | + | .bai | Index files for BAM visualisation | + | .gtf | Transcript annotation and assembly | + | .txt / .csv | Gene counts and summary statistics | + | .pdf | Visualisation outputs | + + - id: tools + title: Tools + content: + subsections: + + - id: qc + title: Quality control + content: + - title_md: FastQC - Assess sequencing read quality + description_md: | + Assess the quality of raw RNA-seq reads before downstream analysis. This tool checks base quality scores, GC content, sequence length distribution, duplication levels, and possible adapter contamination. + + Output: Produces HTML quality reports and summary statistics for each FASTQ file (.html, .zip). + inputs: + - label: Sequencing reads (FASTQ) + datatypes: + - fastq + outputs: + - label: FastQC report (HTML) + button_md: Run FastQC + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" + + - title_md: MultiQC - Aggregate QC reports + description_md: | + Combine quality control reports from multiple samples into one summary report. This tool is useful for comparing FastQC, trimming, alignment, and other QC results across all samples in a project. + + Output: Produces a single interactive summary report across samples (.html). + inputs: + - label: FastQC reports + datatypes: + - html + - zip + - label: FastP reports + datatypes: + - json + - label: Cutadapt / Trim Galore logs + datatypes: + - txt + outputs: + - label: Summary QC report (HTML) + button_md: Run MultiQC + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmultiqc%2Fmultiqc" + + - title_md: Trim Galore - Trim adapters and low-quality bases + description_md: | + Remove adapter sequences and low-quality bases from RNA-seq reads before alignment. This step improves read quality and helps reduce alignment errors caused by technical sequence contamination. + + Output: Produces trimmed sequencing reads for downstream analysis (.fastq.gz). + inputs: + - label: Sequencing reads (FASTQ) + datatypes: + - fastqsanger + outputs: + - label: Trimmed reads (FASTQ) + button_md: Run Trim Galore + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbgruening%2Ftrim_galore%2Ftrim_galore" + + - title_md: sortmerna - Remove rRNA contamination + description_md: | + Identify and remove ribosomal RNA reads from RNA-seq data. This step helps reduce unwanted rRNA contamination so that downstream analysis focuses on informative transcriptomic reads. + + Output: Produces filtered sequencing reads with reduced rRNA content (.fastq.gz). + inputs: + - label: Sequencing reads (FASTQ) + datatypes: + - fastq + outputs: + - label: Filtered reads (FASTQ) + button_md: Run sortmerna + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Frnateam%2Fsortmerna%2Fbg_sortmerna" + + - id: alignment + title: Alignment + content: + - title_md: STAR - Align reads to reference genome + description_md: | + Align RNA-seq reads to a reference genome using the STAR aligner. This step maps sequencing reads to genomic locations and prepares the data for downstream quantification and transcript analysis. + + Output: Produces aligned read files and alignment statistics (.bam, .sam, .tab). + + inputs: + - label: RNA-seq reads (FASTQ) + datatypes: + - fastq + - label: Reference genome (fasta) + datatypes: + - fasta + - label: Gene annotation file (GTF) + datatypes: + - gtf + outputs: + - label: Aligned reads (BAM) + button_md: Run STAR + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Frgrnastar%2Frna_star" + + - id: post_alignment + title: BAM processing + content: + - title_md: samtools sort - Sort BAM files + description_md: | + Sort aligned BAM files by genomic coordinates. Sorting is required for many downstream tools including indexing, visualisation, and read quantification workflows. + + Output: Produces sorted alignment files suitable for downstream analysis (.bam). + + inputs: + - label: Aligned reads (BAM) + datatypes: + - bam + outputs: + - label: Sorted BAM + button_md: Run samtools sort + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Fsamtools_sort%2Fsamtools_sort" + + - id: quantification + title: Gene quantification + content: + - title_md: StringTie - Assemble and quantify transcripts + description_md: | + Assemble transcripts and estimate transcript abundance from aligned RNA-seq reads. This tool can reconstruct transcript structures and support gene or isoform-level expression analysis. + + Output: Produces transcript assembly and expression estimate files (.gtf, .tab). + inputs: + - label: Aligned reads (BAM) + datatypes: + - bam + - label: Reference annotation (GTF) + datatypes: + - gtf + outputs: + - label: Transcript assembly (GTF) + button_md: Run StringTie + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstringtie%2Fstringtie" + + - title_md: featureCounts - Count reads per gene + description_md: | + Count the number of reads assigned to genes using aligned BAM files and a gene annotation file. This step converts mapped reads into a gene-level count matrix for statistical analysis. + + Output: Produces gene count tables for downstream filtering, normalisation, and differential expression analysis (.txt, .tsv). + + inputs: + - label: Aligned reads (BAM) + datatypes: + - bam + - label: Gene annotation file + datatypes: + - gtf + - gff + outputs: + - label: Gene count matrix (TSV) + button_md: Run featureCounts + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffeaturecounts%2Ffeaturecounts" + + - id: filtering + title: Filtering + content: + - title_md: Filter samples and lowly expressed genes + description_md: | + Filter count matrices by selecting relevant samples and removing lowly expressed genes before differential expression analysis. + + Output: Produces filtered count matrices suitable for downstream statistical analysis (.tsv, .txt). + inputs: + - label: Gene count matrix + datatypes: + - tsv + - label: Sample metadata + datatypes: + - tsv + outputs: + - label: Filtered gene count matrix + datatypes: + - tsv + button_md: Run Filter + button_link: "{{ galaxy_base_url }}/?tool_id=Filter1" + + - id: analysis + title: Differential expression + content: + - title_md: edgeR - Differential expression analysis + description_md: | + Perform differential expression analysis to identify genes with significant expression differences between sample groups. edgeR is commonly used for RNA-seq count data and models biological variation between replicates. + + Output: Produces differential expression result tables containing log fold change, p-values, and adjusted significance values (.tsv, .txt). + inputs: + - label: Gene count matrix + datatypes: + - tsv + - label: Sample metadata + datatypes: + - tsv + outputs: + - label: Differential expression results (TSV) + button_md: Run edgeR + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fedger%2Fedger" + + - title_md: DESeq2 - Differential gene expression analysis + description_md: | + Identify differentially expressed genes between sample groups using RNA-seq count data. DESeq2 includes normalisation and statistical testing steps to compare gene expression across experimental conditions. + + Output: Produces differential expression result tables with log fold change, p-values, and adjusted significance values (.tsv, .txt). + inputs: + - label: Gene count matrix + datatypes: + - tsv + - label: Sample metadata + datatypes: + - tsv + outputs: + - label: Differential expression results (TSV) + button_md: Run DESeq2 + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fdeseq2%2Fdeseq2" + + - id: visualisation + title: Visualisation + content: + - title_md: PCA - Visualise sample clustering + description_md: | + Explore sample clustering using principal component analysis based on expression values. PCA helps identify major sources of variation, outliers, and whether samples group according to biological or experimental conditions. + + Output: Produces PCA plots showing sample relationships and clustering patterns (.png, .pdf, .html). + inputs: + - label: Normalised expression matrix + datatypes: + - tsv + outputs: + - label: PCA plot + button_md: Run PCA + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fggplot2_pca%2Fggplot2_pca" + + - title_md: Heatmap - Visualise gene expression patterns + description_md: | + Visualise gene expression patterns across samples using a heatmap. Heatmaps help identify clusters of genes or samples with similar expression profiles. + + Output: Produces heatmap visualisations showing expression patterns across selected genes and samples (.png, .pdf, .html). + inputs: + - label: Expression matrix + datatypes: + - tsv + outputs: + - label: Heatmap visualisation + button_md: Run Heatmap + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fggplot2_heatmap2%2Fggplot2_heatmap2" + + - title_md: Volcano plot - Visualise differential expression results + description_md: | + Visualise differential expression results by plotting log fold change against statistical significance. Volcano plots help identify genes with large expression changes and strong statistical support. + + Output: Produces volcano plot visualisations highlighting significantly upregulated and downregulated genes (.png, .pdf, .html). + inputs: + - label: Differential expression results (TSV) + datatypes: + - tsv + outputs: + - label: Volcano plot visualisation + button_md: Run Volcano plot + button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fvolcanoplot%2Fvolcanoplot" + + - id: tutorials + title: Tutorials + heading_md: + content: + - title_md: RNA-seq analysis workflow tutorial + description_md: Learn how to perform RNA-seq analysis in Galaxy, including read preprocessing, alignment, transcript assembly, and differential gene expression analysis. + button_md: Tutorials + button_link: https://training.galaxyproject.org/training-material/topics/transcriptomics/ \ No newline at end of file diff --git a/agrf/sections/sequencing_qc.yml b/agrf/sections/sequencing_qc.yml new file mode 100644 index 0000000..328146a --- /dev/null +++ b/agrf/sections/sequencing_qc.yml @@ -0,0 +1,124 @@ +id: qc +title: Sequencing Quality Control + +tabs: + - id: help + title: Overview + heading_md: + content: + - title_md: What is sequencing quality control? + description_md: | + Sequencing quality control helps assess raw sequencing data before analysis. It can identify issues such as low-quality reads, adapter contamination, unusual sequence composition, or format problems. + + This step helps decide whether trimming, filtering, format conversion, or other cleaning steps are needed before downstream analysis. + button_md: Quality Control Tutorial + button_link: https://training.galaxyproject.org/training-material/topics/sequence-analysis/tutorials/quality-control/tutorial.html + + - title_md: What tools can I use for sequencing quality control? + description_md: | + * Galaxy provides tools for checking read quality, trimming adapters, filtering low-quality reads, and summarising QC results. + * Use the Illumina data tab for short-read sequencing quality control tools. + * Use the PacBio data tab for long-read sequencing quality control tools. + + - title_md: Converting data to different formats + description_md: | + Some tools require data in specific formats. You may need to convert between BAM, FASTQ, and FASTQ-Sanger formats, or compress and uncompress files before analysis. + button_md: Tutorial + button_link: https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/upload-rules/tutorial.html + + - id: Illumina + title: Illumina data + heading_md: + content: + - title_md: FastQC - assess sequencing quality + description_md: | + Assess the quality of raw Illumina sequencing reads before downstream analysis. FastQC reports quality scores, GC content, sequence length distribution, duplication levels, and possible adapter contamination. + + Output: Produces sequence quality reports and summary statistics for each sample (.html, .zip). + inputs: + - label: Sequencing reads + datatypes: + - fastq + - fastq.gz + - fastqsanger + - fastqsanger.gz + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" + + - title_md: FastP - trim and filter sequencing reads + description_md: | + Perform read quality control, adapter trimming, and filtering for Illumina sequencing reads. FastP can remove low-quality bases, trim adapters, filter poor-quality reads, and generate quality reports. + + Output: Produces cleaned FASTQ reads and quality control reports (.fastq.gz, .html, .json). + inputs: + - label: Raw Illumina sequencing reads + datatypes: + - fastqsanger + - fastqsanger.gz + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Ffastp%2Ffastp" + + - title_md: Cutadapt - trim adapters and low-quality bases + description_md: | + Remove adapter sequences and low-quality regions from sequencing reads before downstream analysis. This improves read quality and reduces technical sequence contamination. + + Output: Produces trimmed FASTQ reads and trimming reports (.fastq.gz, .txt). + inputs: + - label: Illumina FASTQ reads + datatypes: + - fastqsanger + - fastqsanger.gz + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Flparsons%2Fcutadapt%2Fcutadapt%2F5.2+galaxy0&version=5.2+galaxy0" + + - title_md: MultiQC - summarise quality control reports + description_md: | + Combine quality control results from multiple samples and tools into one summary report. MultiQC is useful for reviewing FastQC, FastP, Cutadapt, and other QC outputs across a whole project. + + Output: Produces an interactive combined quality control summary report (.html). + inputs: + - label: FastQC reports + datatypes: + - html + - zip + + - label: FastP reports + datatypes: + - json + + - label: Cutadapt / Trim Galore logs + datatypes: + - txt + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fmultiqc%2Fmultiqc" + + - id: Pacbio + title: PacBio data + heading_md: + content: + - title_md: PacBio quality control + description_md: | + PacBio HiFi sequencing data is typically assessed using PacBio SmartLink software, which provides sequencing run metrics, read quality summaries, and quality control reports. + + SmartLink is not currently available within Galaxy. The tools listed below provide basic quality control functionality for PacBio long-read data within Galaxy. + + - title_md: FastQC - basic sequence quality assessment + description_md: | + FastQC can be used to assess basic sequence quality metrics for PacBio FASTQ files, including read quality scores, sequence length distributions, and sequence composition. + + Output: Produces sequence quality reports and summary statistics (.html, .zip). + inputs: + - label: PacBio FASTQ reads + datatypes: + - fastq + - fastq.gz + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Ffastqc%2Ffastqc" + + - title_md: NanoPlot - visualise long-read sequencing quality + description_md: | + Assess quality metrics for PacBio and Nanopore long-read sequencing datasets. NanoPlot visualises read length distributions, quality scores, sequencing yield, and other long-read summary statistics. + + Output: Produces sequencing quality plots and summary reports (.html, .png, .txt). + inputs: + - label: Long-read sequencing data + datatypes: + - fastq + - fastq.gz + - bam + button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fnanoplot%2Fnanoplot" \ No newline at end of file diff --git a/agrf/static/local/base.yml b/agrf/static/local/base.yml new file mode 100644 index 0000000..5193a80 --- /dev/null +++ b/agrf/static/local/base.yml @@ -0,0 +1,37 @@ +# Default spec for a Lab page - this is a test +# Any variables defined here can be used in Md/HTML templates, like: +# "# Welcome to the Agrf" + +site_name: "Australia" +lab_name: AGRF Lab #this will be in caps, at right of logo +#or: use the word lab, in same font as agrf logo +galaxy_base_url: https://usegalaxy.org.au +subdomain: agrf +root_domain: usegalaxy.org.au + +# The files below must be accessible on the internet, relative to this file +# ----------------------------------------------------------------------------- +# Custom content relative to this file URL +header_logo: static/agrf-lab.png #logo at the left hand side +custom_css: static/custom.css +intro_md: templates/intro.md #intro text +footer_md: templates/footer.md +conclusion_md: templates/conclusion.md + +# Data to be rendered into sections/tabs/accordion elements: + +#the sections are the yml files + +sections: + - sections/data.yml + - sections/sequencing_qc.yml + - sections/microbial.yml + - sections/rnaseq.yml + - sections/metagenomics.yml + - sections/genomeassembly.yml + - sections/gbs.yml + - sections/learn.yml + - sections/help_contact.yml +# - sections/moreanalysis.yml + +# diff --git a/agrf/templates/conclusion.md b/agrf/templates/conclusion.md index 966ead6..e6e650a 100644 --- a/agrf/templates/conclusion.md +++ b/agrf/templates/conclusion.md @@ -1,69 +1,5 @@ -Thanks for checking out the Galaxy {{ site_name }} {{ lab_name }}! +Thanks for checking out the Galaxy {{ site_name }} {{ lab_name }}! +AGRF is the [Australian Genome Research Facility](https://www.agrf.org.au/). -

- AGRF is the Australian Genome Research Facility. -
- - Order more sequencing from AGRF - -

- - - [Contact Galaxy Australia](https://site.usegalaxy.org.au/request). - - [Connect with the Australian genomics community](https://www.biocommons.org.au/genomics-domain). - - -
-
- Logo 1 -
-
- Logo 2 -
-
- Logo 3 -
-
- +For support, training resources, or contact information, please use the **Help and Contact** section above. \ No newline at end of file diff --git a/agrf/templates/intro.md b/agrf/templates/intro.md index 38dd504..98dea80 100644 --- a/agrf/templates/intro.md +++ b/agrf/templates/intro.md @@ -1,5 +1,9 @@ Welcome to the Galaxy {{ site_name }} {{ lab_name }}! +
+ Important: This Galaxy Lab provides self-serve links to Galaxy tools and training resources. AGRF does not provide support for running or troubleshooting these tools directly. For Galaxy platform or tool issues, please contact Galaxy Australia support. For complex analyses or custom bioinformatics projects, please contact AGRF to discuss service options. +
+ @@ -63,25 +69,34 @@ then add Section, so data becomes dataSection
Import Data
-
Quality Control
+
Sequencing Quality Control
Microbial Profiling
-
-
Learn Galaxy
+
RNASeq
+
+
+
Metagenomics
-
Galaxy Help
+
Genome Assembly
-
Contact AGRF
+
GBS
+ +
+
Learn Galaxy
+
+
+
Help and Contact
+
+ -