Mercurial > repos > iuc > fairy
changeset 0:6613ca06dd56 draft default tip
planemo upload for repository https://github.com/bluenote-1577/fairy commit bfba48480e14ad877a96476ef1a9d6973e1129f6
author | iuc |
---|---|
date | Sun, 12 Jan 2025 19:00:17 +0000 |
parents | |
children | |
files | fairy_cov.xml fairy_sketch.xml macros.xml test-data/forward.paired.bcsp test-data/normal_test.tsv test-data/single_test.fasta.gz test-data/single_test.fasta.gz.bcsp test-data/test_2.tsv test-data/test_3.tsv test-data/test_paired_1.fq.gz test-data/test_paired_1.fq.gz.paired.bcsp test-data/test_paired_2.fq.gz |
diffstat | 12 files changed, 254 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fairy_cov.xml Sun Jan 12 19:00:17 2025 +0000 @@ -0,0 +1,114 @@ +<tool id="fairy_cov" name="Fairy coverage" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Create coverage file for specific binners</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"> + <![CDATA[ + + #import re + + #set $file = re.sub('[^\s\w\-\\.]', '_', str($contig.element_identifier)) + #set $bcsp = re.sub('[^\s\w\-\\.]', '_', str($bcsp_file.element_identifier)) + + ln -s '$contig' '$file' && + ln -s '$bcsp_file' '$bcsp' && + + fairy coverage + '$file' + '$bcsp' + -t "\${GALAXY_SLOTS:-3}" + -m ${minimum_ani} + -M ${min_number_kmers} + -c ${c} + -k ${k} + --min-spacing ${min_spacing} + ${full_contig_name} + #if $output_type == 'semi': + --aemb-format + #end if + #if $output_type == 'max': + --maxbin-format + #end if + -o '$output' + + ]]> + </command> + <inputs> + <param name="contig" type="data" format="fasta,fasta.gz" label="Input fasta contig file" help="Input the RAW FASTA contig file. It can be gzip!"/> + <param name="bcsp_file" type="data" format="bcsp" label="Input the pre-sketched file (.bcsp file)" help="This file will be generated with the fairy sketch tool."/> + <param argument="--minimum-ani" type="integer" optional="true" min="0" max="100" value="95" label="Set minimum ANI" help="Set the minimum adjusted ANI for the coverage calculation"/> + <param argument="--min-number-kmers" type="integer" value="8" optional="true" label="Genome filter" help="Filter out genomes with less then x k-mer sampled."/> + <param argument="-c" type="integer" value="50" optional="true" label="Set subsampling rate" help="This value does not interact with the .bcsp file which was used as input."/> + <param argument="-k" type="select" label="Select k-mer size" help="This value does not interact with the .bcsp file which was used as input."> + <option value="31">31</option> + <option value="21">21</option> + </param> + <param argument="--min-spacing" type="integer" value="30" label="Set spacing between k-mers" help=" Minimum spacing between selected k-mers on the contigs."/> + <param argument="--full-contig-name" type="boolean" falsevalue="" truevalue="--full-contig-name" label="Full contig name" + help ="When a contig has a space in there name this option allows to use the full name instead only the name till the first space"/> + <param name="output_type" type="select" label="Select for which binner the output should be generated"> + <option value="meta">MetaBAT2</option> + <option value="semi">SemiBin2</option> + <option value="max">MaxBin2</option> + </param> + </inputs> + <outputs> + <data name="output" format="tabular" label="${tool.name} on ${on_string}"/> + </outputs> + <tests> + <test> + <param name="contig" value="single_test.fasta.gz" ftype="fasta.gz"/> + <param name="bcsp_file" value="single_test.fasta.gz.bcsp" ftype="bcsp"/> + <output name="output" value="normal_test.tsv"/> + </test> + <test> + <param name="contig" value="single_test.fasta.gz" ftype="fasta.gz"/> + <param name="bcsp_file" value="single_test.fasta.gz.bcsp" ftype="bcsp"/> + <param name="minimum-ani" value="99"/> + <param name="min-number-kmers" value="2"/> + <param name="full-contig-name" value="true"/> + <param name="output_type" value="semi"/> + <output name="output" value="test_2.tsv"/> + </test> + <test> + <param name="contig" value="single_test.fasta.gz" ftype="fasta.gz"/> + <param name="bcsp_file" value="single_test.fasta.gz.bcsp" ftype="bcsp"/> + <param name="k" value="21"/> + <param name="c" value="45"/> + <param name="min-spacing" value="10"/> + <param name="output_type" value="max"/> + <output name="output" value="test_3.tsv"/> + </test> + </tests> + <help> + <![CDATA[ + + Fairy computes multi-sample contig coverage for metagenome-assembled genome (MAG) binning. + + Fairy is used after metagenomic assembly and before binning. It can + + - Calculate coverage 100x-1000x faster than read alignment (e.g. BWA) + - Give comparable bins for multi-sample binning (short read or nanopore reads) + - Output formats that are compatible with MetaBAT2, MaxBin2, SemiBin2, and more + + Caveats: + + - Don't use fairy for single-sample binning + - Don't use fairy for PacBio HiFi + + For more information visit `the wiki site on GitHub <https://github.com/bluenote-1577/fairy/wiki/Introduction-to-fairy>`_., + + .. class:: infomark + + Fairy usage for SemiBin2 is different than other tools: SemiBin2 requires separate coverage files for each read sample -- other tools require a single coverage matrix. + + .. class:: infomark + + The default output format from Fairy is the MetaBAT2 format. Any tool using this or the format from the other 2 binners work also with Fairys coverage files! + + ]]> + </help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fairy_sketch.xml Sun Jan 12 19:00:17 2025 +0000 @@ -0,0 +1,118 @@ +<tool id="fairy_sketch" name="Fairy sketch" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>sketching of k-mers for coverage into a hashtable</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"> + <![CDATA[ + + #import re + + mkdir -p res && + + #if $input.is_select == "single": + #set $file = re.sub('[^\s\w\-\\.]', '_', str($reads.element_identifier)) + #set $filename = $file + '.bcsp' + ln -s '$reads' '$file' && + #else if $input.is_select == 'pair': + #set $file_1 = re.sub('[^\s\w\-\\.]', '_', str($first_pairs.element_identifier)) + #set $file_2 = re.sub('[^\s\w\-\\.]', '_', str($second_pairs.element_identifier)) + ## Since the tool used the first inputed file to name the output file this has to be used there to cp the right file + #set $filename = $file_1 + '.paired.bcsp' + ln -s '$first_pairs' '$file_1' && + ln -s '$second_pairs' '$file_2' && + #else + #set $file_1 = re.sub('[^\s\w\-\\.]', '_', str($paired_collection.forward.element_identifier)) + #set $file_2 = re.sub('[^\s\w\-\\.]', '_', str($paired_collection.reverse.element_identifier)) + ## Since the tool used the first inputed file to name the output file this has to be used there to cp the right file + #set $filename = $file_1 + '.paired.bcsp' + ln -s '$paired_collection.forward' '$file_1' && + ln -s '$paired_collection.reverse' '$file_2' && + #end if + + fairy sketch + -t "\${GALAXY_SLOTS:-3}" + -c ${c} + -k ${k} + -d 'res' + #if $input.is_select == "single": + -r '$file' + #else + -1 '$file_1' + -2 '$file_2' + #end if + && + + cp './res/${filename}' '$output' + + ]]> + </command> + <inputs> + <conditional name="input"> + <param name="is_select" type="select" label="Single or paired-end reads"> + <option value="single">Single</option> + <option value="pair">Paired</option> + <option value="paired_collection">Paired collection</option> + </param> + <when value="single"> + <param argument="--reads" type="data" format="fastqsanger,fasta,fastq,fasta.gz,fastq.gz" label="Input single-end reads"/> + </when> + <when value="pair"> + <param argument="--first_pairs" type="data" format="fastqsanger,fasta,fastq,fasta.gz,fastq.gz" label="Input first paired-end reads"/> + <param argument="--second_pairs" type="data" format="fastqsanger,fasta,fastq,fasta.gz,fastq.gz" label="Input second paired-end reads"/> + </when> + <when value="paired_collection"> + <param name="paired_collection" format="fastqsanger,fasta,fastq,fasta.gz,fastq.gz" type="data_collection" collection_type="paired" label="Input paired collection reads"/> + </when> + </conditional> + <param argument="-c" type="integer" value="50" optional="true" label="Set the subsampling rate"/> + <param argument="-k" type="select" label="Select k-mer size"> + <option value="31">31</option> + <option value="21">21</option> + </param> + </inputs> + <outputs> + <data name="output" format="bcsp" label="${tool.name} on ${on_string}"/> + </outputs> + <tests> + <test> + <conditional name="input"> + <param name="is_select" value="single"/> + <param name="reads" value="single_test.fasta.gz" ftype="fasta.gz"/> + </conditional> + <output name="output" file="single_test.fasta.gz.bcsp"/> + </test> + <test> + <conditional name="input"> + <param name="is_select" value="pair"/> + <param name="first_pairs" value="test_paired_1.fq.gz" ftype="fastq.gz"/> + <param name="second_pairs" value="test_paired_2.fq.gz" ftype="fastq.gz"/> + </conditional> + <output name="output" file="test_paired_1.fq.gz.paired.bcsp"/> + </test> + <test> + <conditional name="input"> + <param name="is_select" value="paired_collection"/> + <param name="paired_collection"> + <collection type="paired"> + <element name="forward" value="test_paired_1.fq.gz" ftype="fastq.gz" /> + <element name="reverse" value="test_paired_2.fq.gz" ftype="fastq.gz" /> + </collection> + </param> + </conditional> + <output name="output" file="forward.paired.bcsp"/> + </test> + </tests> + <help> + <![CDATA[ + + This tool sketch the k-mer into a hashtable which will be needed for the fairy coverage tool to create the coverage file. + + .. class:: infomark + + This tool can either use single-end or paired-end reads as input in multiple file formats. + ]]> + </help> + <expand macro="citations"/> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Jan 12 19:00:17 2025 +0000 @@ -0,0 +1,17 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">fairy</requirement> + <yield/> + </requirements> + </xml> + <token name="@TOOL_VERSION@">0.5.7</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">24.1</token> + <xml name="citations"> + <citations> + <citation type="doi">10.1101/2024.04.23.590803</citation> + <yield/> + </citations> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/normal_test.tsv Sun Jan 12 19:00:17 2025 +0000 @@ -0,0 +1,2 @@ +contigName contigLen totalAvgDepth single_test.fasta.gz single_test.fasta.gz-var +NZ_CP017438.1 3123040 0.05509718146076748 0.05509718146076748 0.014618167653679848
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_2.tsv Sun Jan 12 19:00:17 2025 +0000 @@ -0,0 +1,1 @@ +NZ_CP017438.1 0.05509718146076748