changeset 0:6613ca06dd56 draft default tip

planemo upload for repository https://github.com/bluenote-1577/fairy commit bfba48480e14ad877a96476ef1a9d6973e1129f6
author iuc
date Sun, 12 Jan 2025 19:00:17 +0000
parents
children
files fairy_cov.xml fairy_sketch.xml macros.xml test-data/forward.paired.bcsp test-data/normal_test.tsv test-data/single_test.fasta.gz test-data/single_test.fasta.gz.bcsp test-data/test_2.tsv test-data/test_3.tsv test-data/test_paired_1.fq.gz test-data/test_paired_1.fq.gz.paired.bcsp test-data/test_paired_2.fq.gz
diffstat 12 files changed, 254 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fairy_cov.xml	Sun Jan 12 19:00:17 2025 +0000
@@ -0,0 +1,114 @@
+<tool id="fairy_cov" name="Fairy coverage" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Create coverage file for specific binners</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code">
+        <![CDATA[
+
+            #import re
+
+            #set $file = re.sub('[^\s\w\-\\.]', '_', str($contig.element_identifier))
+            #set $bcsp = re.sub('[^\s\w\-\\.]', '_', str($bcsp_file.element_identifier))
+
+            ln -s '$contig' '$file' &&
+            ln -s '$bcsp_file' '$bcsp' &&
+
+            fairy coverage
+            '$file'
+            '$bcsp'
+            -t "\${GALAXY_SLOTS:-3}"
+            -m ${minimum_ani}
+            -M ${min_number_kmers}
+            -c ${c}
+            -k ${k}
+            --min-spacing ${min_spacing}
+            ${full_contig_name}
+            #if $output_type == 'semi':
+                --aemb-format
+            #end if
+            #if $output_type == 'max':
+                --maxbin-format
+            #end if
+            -o '$output'
+
+        ]]>
+    </command>
+    <inputs>
+        <param name="contig" type="data" format="fasta,fasta.gz" label="Input fasta contig file" help="Input the RAW FASTA contig file. It can be gzip!"/>
+        <param name="bcsp_file" type="data" format="bcsp" label="Input the pre-sketched file (.bcsp file)" help="This file will be generated with the fairy sketch tool."/>
+        <param argument="--minimum-ani" type="integer" optional="true" min="0" max="100" value="95" label="Set minimum ANI" help="Set the minimum adjusted ANI for the coverage calculation"/>
+        <param argument="--min-number-kmers" type="integer" value="8" optional="true" label="Genome filter" help="Filter out genomes with less then x k-mer sampled."/>
+        <param argument="-c" type="integer" value="50" optional="true" label="Set subsampling rate" help="This value does not interact with the .bcsp file which was used as input."/>
+        <param argument="-k" type="select" label="Select k-mer size" help="This value does not interact with the .bcsp file which was used as input.">
+            <option value="31">31</option>
+            <option value="21">21</option>
+        </param>
+        <param argument="--min-spacing" type="integer" value="30" label="Set spacing between k-mers" help=" Minimum spacing between selected k-mers on the contigs."/>
+        <param argument="--full-contig-name" type="boolean" falsevalue="" truevalue="--full-contig-name" label="Full contig name"
+            help ="When a contig has a space in there name this option allows to use the full name instead only the name till the first space"/>
+        <param name="output_type" type="select" label="Select for which binner the output should be generated">
+            <option value="meta">MetaBAT2</option>
+            <option value="semi">SemiBin2</option>
+            <option value="max">MaxBin2</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="contig" value="single_test.fasta.gz" ftype="fasta.gz"/>
+            <param name="bcsp_file" value="single_test.fasta.gz.bcsp" ftype="bcsp"/>
+            <output name="output" value="normal_test.tsv"/>
+        </test>
+        <test>
+            <param name="contig" value="single_test.fasta.gz" ftype="fasta.gz"/>
+            <param name="bcsp_file" value="single_test.fasta.gz.bcsp" ftype="bcsp"/>
+            <param name="minimum-ani" value="99"/>
+            <param name="min-number-kmers" value="2"/>
+            <param name="full-contig-name" value="true"/>
+            <param name="output_type" value="semi"/>
+            <output name="output" value="test_2.tsv"/>
+        </test>
+        <test>
+            <param name="contig" value="single_test.fasta.gz" ftype="fasta.gz"/>
+            <param name="bcsp_file" value="single_test.fasta.gz.bcsp" ftype="bcsp"/>
+            <param name="k" value="21"/>
+            <param name="c" value="45"/>
+            <param name="min-spacing" value="10"/>
+            <param name="output_type" value="max"/>
+            <output name="output" value="test_3.tsv"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+
+            Fairy computes multi-sample contig coverage for metagenome-assembled genome (MAG) binning.
+
+            Fairy is used after metagenomic assembly and before binning. It can
+
+            - Calculate coverage 100x-1000x faster than read alignment (e.g. BWA)
+            - Give comparable bins for multi-sample binning (short read or nanopore reads)
+            - Output formats that are compatible with MetaBAT2, MaxBin2, SemiBin2, and more
+            
+            Caveats:
+
+            - Don't use fairy for single-sample binning
+            - Don't use fairy for PacBio HiFi
+
+            For more information visit `the wiki site on GitHub <https://github.com/bluenote-1577/fairy/wiki/Introduction-to-fairy>`_.,
+
+            .. class:: infomark
+
+            Fairy usage for SemiBin2 is different than other tools: SemiBin2 requires separate coverage files for each read sample -- other tools require a single coverage matrix.
+
+            .. class:: infomark
+
+            The default output format from Fairy is the MetaBAT2 format. Any tool using this or the format from the other 2 binners work also with Fairys coverage files!
+        
+        ]]>
+    </help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fairy_sketch.xml	Sun Jan 12 19:00:17 2025 +0000
@@ -0,0 +1,118 @@
+<tool id="fairy_sketch" name="Fairy sketch" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>sketching of k-mers for coverage into a hashtable</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code">
+        <![CDATA[
+        
+            #import re
+
+            mkdir -p res &&
+
+            #if $input.is_select == "single":
+                #set $file = re.sub('[^\s\w\-\\.]', '_', str($reads.element_identifier))
+                #set $filename = $file + '.bcsp'
+                ln -s '$reads' '$file' &&
+            #else if $input.is_select == 'pair':
+                #set $file_1 = re.sub('[^\s\w\-\\.]', '_', str($first_pairs.element_identifier))
+                #set $file_2 = re.sub('[^\s\w\-\\.]', '_', str($second_pairs.element_identifier))
+                ## Since the tool used the first inputed file to name the output file this has to be used there to cp the right file 
+                #set $filename = $file_1 + '.paired.bcsp'
+                ln -s '$first_pairs' '$file_1' &&
+                ln -s '$second_pairs' '$file_2' &&
+            #else
+                #set $file_1 = re.sub('[^\s\w\-\\.]', '_', str($paired_collection.forward.element_identifier))
+                #set $file_2 = re.sub('[^\s\w\-\\.]', '_', str($paired_collection.reverse.element_identifier))
+                ## Since the tool used the first inputed file to name the output file this has to be used there to cp the right file 
+                #set $filename = $file_1 + '.paired.bcsp'
+                ln -s '$paired_collection.forward' '$file_1' &&
+                ln -s '$paired_collection.reverse' '$file_2' &&
+            #end if
+
+            fairy sketch
+            -t "\${GALAXY_SLOTS:-3}"
+            -c ${c}
+            -k ${k}
+            -d 'res'
+            #if $input.is_select == "single":
+                -r '$file'
+            #else
+                -1 '$file_1'
+                -2 '$file_2'
+            #end if
+            &&
+
+            cp './res/${filename}' '$output'
+
+        ]]>
+    </command>
+    <inputs>
+        <conditional name="input">
+            <param name="is_select" type="select" label="Single or paired-end reads">
+                <option value="single">Single</option>
+                <option value="pair">Paired</option>
+                <option value="paired_collection">Paired collection</option>
+            </param>
+            <when value="single">
+                <param argument="--reads" type="data" format="fastqsanger,fasta,fastq,fasta.gz,fastq.gz" label="Input single-end reads"/>
+            </when>
+            <when value="pair">
+                <param argument="--first_pairs" type="data" format="fastqsanger,fasta,fastq,fasta.gz,fastq.gz" label="Input first paired-end reads"/>
+                <param argument="--second_pairs" type="data" format="fastqsanger,fasta,fastq,fasta.gz,fastq.gz" label="Input second paired-end reads"/>
+            </when>
+            <when value="paired_collection">
+                <param name="paired_collection" format="fastqsanger,fasta,fastq,fasta.gz,fastq.gz" type="data_collection" collection_type="paired" label="Input paired collection reads"/>
+            </when>
+        </conditional>
+        <param argument="-c" type="integer" value="50" optional="true" label="Set the subsampling rate"/>
+        <param argument="-k" type="select" label="Select k-mer size">
+            <option value="31">31</option>
+            <option value="21">21</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format="bcsp" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="input">
+                <param name="is_select" value="single"/>
+                <param name="reads" value="single_test.fasta.gz" ftype="fasta.gz"/>
+            </conditional>
+            <output name="output" file="single_test.fasta.gz.bcsp"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="is_select" value="pair"/>
+                <param name="first_pairs" value="test_paired_1.fq.gz" ftype="fastq.gz"/>
+                <param name="second_pairs" value="test_paired_2.fq.gz" ftype="fastq.gz"/>
+            </conditional>
+            <output name="output" file="test_paired_1.fq.gz.paired.bcsp"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="is_select" value="paired_collection"/>
+                <param name="paired_collection">
+                    <collection type="paired">
+                        <element name="forward" value="test_paired_1.fq.gz" ftype="fastq.gz" />
+                        <element name="reverse" value="test_paired_2.fq.gz" ftype="fastq.gz" />
+                    </collection>
+                </param>    
+            </conditional>
+            <output name="output" file="forward.paired.bcsp"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+
+        This tool sketch the k-mer into a hashtable which will be needed for the fairy coverage tool to create the coverage file.
+
+        .. class:: infomark
+
+            This tool can either use single-end or paired-end reads as input in multiple file formats.
+        ]]>
+    </help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Jan 12 19:00:17 2025 +0000
@@ -0,0 +1,17 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">fairy</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">0.5.7</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">24.1</token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1101/2024.04.23.590803</citation>
+            <yield/>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
Binary file test-data/forward.paired.bcsp has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normal_test.tsv	Sun Jan 12 19:00:17 2025 +0000
@@ -0,0 +1,2 @@
+contigName	contigLen	totalAvgDepth	single_test.fasta.gz	single_test.fasta.gz-var
+NZ_CP017438.1	3123040	0.05509718146076748	0.05509718146076748	0.014618167653679848
Binary file test-data/single_test.fasta.gz has changed
Binary file test-data/single_test.fasta.gz.bcsp has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2.tsv	Sun Jan 12 19:00:17 2025 +0000
@@ -0,0 +1,1 @@
+NZ_CP017438.1	0.05509718146076748
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_3.tsv	Sun Jan 12 19:00:17 2025 +0000
@@ -0,0 +1,2 @@
+contigName
+NZ_CP017438.1
Binary file test-data/test_paired_1.fq.gz has changed
Binary file test-data/test_paired_1.fq.gz.paired.bcsp has changed
Binary file test-data/test_paired_2.fq.gz has changed