AGRF · tojoa10 · Feb 27, 2026 · Feb 27, 2026 · Mar 6, 2026 · Mar 13, 2026
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+python-3.14.3-amd64 (1).exe
+venv
+venv311
+.DS_Stores
diff --git a/agrf/CONTRIBUTORS b/agrf/CONTRIBUTORS
diff --git a/agrf/base.yml b/agrf/base.yml
@@ -5,7 +5,7 @@
 site_name: "Australia"
 lab_name: AGRF Lab #this will be in caps, at right of logo
 #or: use the word lab, in same font as agrf logo
-galaxy_base_url: https://agrf.usegalaxy.org.au
+galaxy_base_url: https://usegalaxy.org.au
 subdomain: agrf
 root_domain: usegalaxy.org.au
 
@@ -24,10 +24,16 @@ conclusion_md: templates/conclusion.md
 
 sections:
   - sections/data.yml
-  - sections/qualitycontrol.yml
+  - sections/sequencing_qc.yml
   - sections/microbial.yml
-#  - sections/moreanalysis.yml
+  - sections/rnaseq.yml
+  - sections/metagenomics.yml
+  - sections/genomeassembly.yml
+  - sections/gbs.yml
   - sections/learn.yml
-#  - sections/help.yml
+  - sections/help_contact.yml
+#  - sections/moreanalysis.yml
+
+
 #  - sections/agrf.yml
 
diff --git a/agrf/sections/data.yml b/agrf/sections/data.yml
@@ -1,43 +1,37 @@
 id: import
 title: Import data
+
 tabs:
   - id: overview
     title: Overview
-    heading_md: 
+    heading_md:
+
     content:
+
       - title_md: Get AGRF data into Galaxy
         description_md: |
-          * Find your fastq data at the [AGRF Hub](https://www.agrf.org.au/my-agrf-hub). 
-          * Download this data to your computer.
-          * Upload data from your computer to Galaxy.
-          * Tip: only import the files that you need. 
+          * Find your FASTQ data at the [AGRF Hub](https://www.agrf.org.au/my-agrf-hub).
+          * Download the required files to your computer.
+          * Upload data from your computer into Galaxy.
+          * Tip: only import the files that you need for your analysis.
+
+          *Note: Galaxy and the AGRF Hub are not configured for long-term data storage. Ensure raw and processed data are backed up to a secure archive.*
 
-          *Note: Galaxy and the AGRF Hub are not configured for long-term data storage. Ensure you save raw and processed data to a secure archive.*
         button_md: Upload data
         button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=upload1"
+
       - title_md: Compress large files
         description_md: |
-          * After uploading data, we recommend compressing <code>fastq</code> and <code>fasta</code> files. 
-          * This will save you time and storage space. 
+          * After uploading data, we recommend compressing <code>FASTQ</code> and <code>FASTA</code> files.
+          * Compression reduces storage usage and can improve upload and download performance.
+
         button_md: Compress data
-        button_link: "{{ galaxy_base_url }}/tool_runner?tool_id=https://toolshed.g2.bx.psu.edu/repos/iuc/compress_file" #does this need additional text eg %2Frepos% etc?
+        button_link: "https://usegalaxy.org.au/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fcompress_file%2Fcompress_file%2F0.1.0&version=latest"
+
       # - title_md: Learn how to use Galaxy
       #   description_md: |
-      #     * Galaxy Australia is free to use. 
-      #     * Get started with these user-friendly tutorials. 
+      #     * Galaxy Australia is free to use.
+      #     * Get started with these user-friendly tutorials.
+      #
       #   button_md: Galaxy tutorials
-      #   button_link: "https://training.galaxyproject.org/" 
-
-  - id: help
-    title: Help
-    heading_md: 
-    content:
-
-      - title_md: Galaxy Australia support
-        description_md: Request support, new tools, or additional storage. 
-        button_md: Request support
-        button_link: /request/support
-      - title_md: AGRF support
-        description_md: Contact AGRF for more help with your data. 
-        button_md: Contact AGRF
-        button_link: /request/support
+      #   button_link: "https://training.galaxyproject.org/"
diff --git a/agrf/sections/gbs.yml b/agrf/sections/gbs.yml
@@ -0,0 +1,167 @@
+id: gbs
+title: GBS
+
+tabs:
+  - id: overview
+    title: Overview
+    content:
+      - title_md: About the service
+        description_md: |
+          Genotyping-by-sequencing (GBS) is used to identify genetic variation across multiple samples by sequencing reduced representation regions of the genome generated through restriction enzyme digestion.
+
+          Library preparation is performed using a ddRAD-based protocol, which includes digestion of genomic DNA with restriction enzymes, ligation of barcoded adapters, size selection of fragments, and PCR amplification to generate sequencing-ready libraries.
+
+          Sequencing data is processed using the DeNovoGBS pipeline from NGSEP software. Raw sequencing reads are demultiplexed into individual samples based on barcode sequences. Reads are clustered based on sequence similarity, and variants are detected within each cluster using a probabilistic model.
+
+          Quality control metrics are generated throughout the analysis, including read counts per sample, clustering statistics, and similarity between samples.
+
+          The final outputs include variant calls, consensus sequences for each cluster, demultiplexed sequencing reads, and quality control visualisations, enabling downstream population genetics and diversity analyses.
+
+      - title_md: Sequencing Output
+        description_md: |
+         **Number of samples:** 95 samples following demultiplexing  
+
+         Raw sequencing reads are provided as compressed FASTQ files for each individual sample after barcode-based demultiplexing.
+
+      - title_md: What files are included?
+        description_md: |
+
+         | Filename | Description |
+         |----------|------------|
+         | *.fastq.gz | Demultiplexed sequencing reads for each sample |
+         | *_variants.vcf | Variant calls across all samples |
+         | *.fa | Consensus sequences for each read cluster |
+         | QC figures (.jpg) | Quality control visualisations (read counts, clustering, IBS heatmap) |
+
+      - title_md: File formats used
+        description_md: |
+
+         | Type | Description |
+         |------|------------|
+         | .fastq.gz | Demultiplexed sequencing reads per sample |
+         | .vcf | Variant call format for genotype data |
+         | .fa | FASTA format for consensus sequences |
+         | .jpg | Quality control figures |
+
+  - id: tools
+    title: Tools
+    content:
+      subsections:
+        - id: stacks
+          title: STACKS workflow
+          content:
+
+            - title_md: <code>ustacks</code> - Build loci for each sample
+              description_md: |
+                Assemble short-read sequences into putative loci for each sample and identify SNPs within stacks of matching reads. This is the first step in the STACKS workflow and is used to build loci from raw GBS or ddRAD sequencing reads.
+
+                Output: Produces loci assemblies, SNP calls, haplotype information, and model files for each sample (.tags.tsv.gz, .snps.tsv.gz, .alleles.tsv.gz, .models.tsv.gz).
+              inputs:
+                - label: Demultiplexed FASTQ reads for each sample
+                  datatypes:
+                    - fastqsanger
+              outputs:
+                - label: Sample loci and SNP TSV files generated by ustacks
+              button_md: Run ustacks
+              button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_ustacks%2Fstacks_ustacks"
+
+            - title_md: <code>cstacks</code> - Create catalog of loci
+              description_md: |
+                Create a catalogue of consensus loci by combining loci identified across multiple samples. This catalogue acts as a reference set of loci for downstream matching and population analysis.
+
+                Output: Produces catalogue loci and SNP information shared across samples (.catalog.tags.tsv.gz, .catalog.snps.tsv.gz).
+              inputs:
+                - label: Sample loci files from ustacks (sample.tags.tsv.gz)
+                  datatypes:
+                    - tabular
+              outputs:
+                - label: Catalog of loci and SNP TSV files generated by cstacks
+              button_md: Run cstacks
+              button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks_cstacks%2Fstacks_cstacks"
+
+            - title_md: <code>sstacks</code> - Match samples to catalog
+              description_md: |
+                Match loci from individual samples to the catalogue of consensus loci generated by cstacks. This step identifies shared loci between samples and the catalogue for downstream population analysis.
+
+                Output: Produces catalogue matching files describing loci matches for each sample (.matches.tsv.gz).
+              inputs:
+                - label: Sample loci files from ustacks (sample.tags.tsv.gz)
+                  datatypes:
+                    - tabular
+                - label: Catalog loci file from cstacks (catalog.tags.tsv.gz)
+                  datatypes:
+                    - tabular
+              outputs:
+                - label: Matches-to-catalog TSV files generated by sstacks
+              button_md: Run sstacks
+              button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_sstacks%2Fstacks2_sstacks"
+
+            - title_md: <code>tsv2bam</code> - Convert TSV to BAM
+              description_md: |
+                Convert STACKS locus information into BAM format while incorporating population map information. This step prepares aligned loci information for downstream genotype calling and population genetics analysis.
+
+                Output: Produces BAM alignment files and associated population mapping information (.bam).
+              inputs:
+                - label: Loci and polymorphism files from STACKS (*.tags.tsv.gz, *.snps.tsv.gz, *.alleles.tsv.gz)
+                  datatypes:
+                    - tabular
+                - label: Catalog loci file from cstacks (catalog.tags.tsv.gz)
+                  datatypes:
+                    - tabular
+                - label: Matches-to-catalog files from sstacks (sample.matches.tsv.gz)
+                  datatypes:
+                    - tabular
+              outputs:
+                - label: BAM alignment files generated by tsv2bam
+              button_md: Run tsv2bam
+              button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_tsv2bam%2Fstacks2_tsv2bam"
+
+            - title_md: <code>gstacks</code> - Assemble loci and call variants
+              description_md: |
+                Assemble loci, align reads, and perform SNP and genotype calling across all samples in the population. This step generates population-level variant information from aligned STACKS data.
+
+                Output: Produces genotype calls, assembled loci, and population-level SNP information (.vcf, .tsv.gz).
+              inputs:
+                - label: BAM alignment files generated by tsv2bam
+                  datatypes:
+                    - bam
+              outputs:
+                - label: Variant calls (VCF) and assembled loci generated by gstacks
+              button_md: Run gstacks
+              button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_gstacks%2Fstacks2_gstacks"
+
+            - title_md: <code>populations</code> - Population genetics analysis
+              description_md: |
+                Perform population genetics analysis and export filtered variant datasets for downstream analysis. This tool calculates population statistics, applies locus filtering, and prepares data for diversity and population structure analysis.
+
+                Output: Produces filtered SNP datasets, population statistics, and export files for downstream analysis (.vcf, .tsv, .structure, .phylip).
+              inputs:
+                - label: Variant calls and assembled loci generated by gstacks
+                  datatypes:
+                    - vcf
+              outputs:
+                - label: Population statistics and export files generated by populations
+              button_md: Run populations
+              button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fstacks2_populations%2Fstacks2_populations"
+
+            - title_md: <code>bcftools filter</code> - Filter variant data
+              description_md: | 
+                Filter variant call files (VCF) based on minor allele frequency (MAF), missing data thresholds, depth, and quality metrics. This step helps retain high-confidence variants for downstream population genetics and diversity analysis.
+
+                Output: Produces filtered variant datasets suitable for downstream analysis (.vcf).
+              inputs:
+                - label: Variant call file (VCF) generated by NGSEP, gstacks, or another variant calling workflow
+                  datatypes:
+                    - vcf
+              outputs:
+                - label: Filtered VCF file for downstream population analysis
+              button_md: Launch Tool
+              button_link: "{{ galaxy_base_url }}/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fiuc%2Fbcftools_filter%2Fbcftools_filter"
+
+  - id: tutorials
+    title: Tutorials
+    content:
+      - title_md: GBS analysis tutorials
+        description_md: Explore Galaxy Training Network tutorials for variant analysis workflows relevant to GBS and population genomics studies.
+        button_md: Tutorials
+        button_link: https://training.galaxyproject.org/training-material/topics/variant-analysis/