Mercurial > repos > earlhaminst > lotus2
changeset 2:cf56a6553385 draft
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/lotus2 commit 4f154640ea9b8d9472307287f0ee6483649c9466"
author | earlhaminst |
---|---|
date | Wed, 19 May 2021 19:15:08 +0000 |
parents | 85da3173a488 |
children | 77cb867e9608 |
files | lotus2.xml test-data/mapping.txt tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample |
diffstat | 4 files changed, 129 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/lotus2.xml Wed May 19 02:38:24 2021 +0000 +++ b/lotus2.xml Wed May 19 19:15:08 2021 +0000 @@ -1,7 +1,7 @@ <tool id="lotus2" name="LotuS2" version="@VERSION@" profile="20.01"> <description>fast OTU processing pipeline</description> <macros> - <token name="@VERSION@">2.05.1</token> + <token name="@VERSION@">2.06</token> <xml name="refDB_macro"> <param argument="-refDB" type="select" label="Reference Database"> <option value="SLV" selected="true">Silva LSU (23/28S) or SSU (16/18S) (SLV)</option> @@ -11,6 +11,10 @@ <option value="beetax">Bee gut specific database and tax names (beetax)</option> <option value="HITdb">Human gut microbiota (HITdb)</option> </param> + <param argument="-useBestBlastHitOnly" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Use the best Blast hit only" help="Do not use LCA (lowest common ancestor) to determine the most likely taxonomic level (not recommended)" /> + </xml> + <xml name="id_macro"> + <param argument="-id" type="float" min="0" max="1" value="0.97" label="Clustering threshold for OTUs" /> </xml> </macros> <requirements> @@ -18,12 +22,28 @@ </requirements> <version_command>lotus2 --version</version_command> <command detect_errors="exit_code"><![CDATA[ +#import os.path +#import re +#def symlink_basename($f): + #set fn = re.sub('[^\w\-_.]', '_', $f.element_identifier) + #if fn.endswith('.gz'): + #set fn = fn[:-3] + #end if + #for ext in ('.fq', '.fastq', '.fastqsanger'): + #if fn.endswith($ext): + #set fn = fn[:-len($ext)] + #break + #end if + #end for +$fn#slurp +#end def + mkdir input && #if $inputs.paired_or_single == 'single': - #for i, f in enumerate($inputs.input): + #for f in $inputs.input: #set ext = $f.ext.replace('sanger', '') - ln -s '$f' 'input/input${i}.${ext}' && + ln -s '$f' 'input/${symlink_basename(f)}.${ext}' && #end for #elif $inputs.paired_or_single == 'paired': #for i, f in enumerate($inputs.left_input): @@ -35,11 +55,11 @@ ln -s '$f' 'input/input${i}.2.${ext}' && #end for #else: - #for i, f in enumerate($inputs.pair_input): + #for f in $inputs.pair_input: #set ext = $f.forward.ext.replace('sanger', '') - ln -s '$f.forward' 'input/input${i}.1.${ext}' && + ln -s '$f.forward' 'input/${symlink_basename(f)}.1.${ext}' && #set ext = $f.reverse.ext.replace('sanger', '') - ln -s '$f.reverse' 'input/input${i}.2.${ext}' && + ln -s '$f.reverse' 'input/${symlink_basename(f)}.2.${ext}' && #end for #end if @@ -62,9 +82,16 @@ #if $reversePrimer: -reversePrimer '$reversePrimer' #end if +#if $offtarget_cond.offtargetDB != 'no': + -offtargetDB '$offtarget_cond.ref_file' +#end if --clustering $clu_args.clustering --id $clu_args.id +-clustering $clu_args.clu_cond.clustering +#if $clu_args.clu_cond.clustering in ('1', '3'): + -id $clu_args.clu_cond.id +#elif $clu_args.clu_cond.clustering == '2': + -swarm_distance $clu_args.clu_cond.swarm_distance +#end if #if $clu_args.derepMin: -derepMin '$clu_args.derepMin' #end if @@ -79,6 +106,7 @@ -utax_thr $tax_args.aligner_cond.utax_thr #else: -refDB $tax_args.aligner_cond.refDB + -useBestBlastHitOnly $tax_args.aligner_cond.useBestBlastHitOnly #end if -amplicon_type $tax_args.amplicon_type -tax_group $tax_args.tax_group @@ -87,6 +115,8 @@ -LCA_cover $tax_args.LCA_cover -LCA_frac $tax_args.LCA_frac -greengenesSpecies $tax_args.greengenesSpecies +-lulu $tax_args.lulu +-buildPhylo $tax_args.buildPhylo ; EXIT_VALUE=\$? ; @@ -99,16 +129,17 @@ <conditional name="inputs"> <param name="paired_or_single" type="select" label="Paired or Single-end data?"> <option value="single" selected="true">Single-end</option> - <option value="paired">Paired-end</option> <option value="paired_collection">Paired-end collection</option> </param> <when value="single"> <param name="input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Single-end reads" /> </when> +<!-- <when value="paired"> <param name="left_input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Left/Forward strand reads" /> <param name="right_input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Right/Reverse strand reads" /> </when> +--> <when value="paired_collection"> <param name="pair_input" type="data_collection" collection_type="list:paired" format="fastqsanger,fastqsanger.gz" label="List of paired reads" /> </when> @@ -122,33 +153,66 @@ <param argument="-barcode" type="data" format="fastqsanger" optional="true" label="Barcode (MID) sequences (optional)" help="FASTQ file with barcodes (in the processed mi/hiSeq format), if provided by the sequencer" /> <param argument="-forwardPrimer" type="text" value="" label="Forward primer used to amplify DNA region" help="E.g. 16S primer fwd" /> <param argument="-reversePrimer" type="text" value="" label="Reverse primer used to amplify DNA region" help="E.g. 16S primer rev" /> + <conditional name="offtarget_cond"> + <param argument="-offtargetDB" type="select" label="Remove likely contaminant OTUs/ASVs based on alignment to host genome" help="Useful for low-bacterial biomass samples to remove possible host genome contaminations"> + <option value="no" selected="true">Disabled</option> + <option value="cached">Use a built-in genome</option> + <option value="history">Use a genome from history</option> + </param> + <when value="no" /> + <when value="cached"> + <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> + <options from_data_table="all_fasta"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No reference genomes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta" label="FASTA reference genome" /> + </when> + </conditional> <section name="clu_args" title="Clustering Options"> - <param argument="-clustering" type="select" label="Clustering algorithm"> - <option value="1">UPARSE</option> - <option value="2">swarm</option> - <option value="3">cd-hit</option> - <option value="6">unoise3</option> - <option value="7" selected="true">dada2</option> - </param> - <param argument="-id" type="float" min="0" max="1" value="0.97" label="Clustering threshold for OTUs" /> - <param argument="-derepMin" type="text" value="" label="Minimum size of dereplicated raw reads" help="E.g. 4:1,4:2,3:3 . See http://lotus2.earlham.ac.uk/images/Derep_options.pdf for how to specify this parameter" /> + <conditional name="clu_cond"> + <param argument="-clustering" type="select" label="Clustering algorithm"> + <option value="1">UPARSE</option> + <option value="2">swarm</option> + <option value="3">cd-hit</option> + <option value="6">unoise3</option> + <option value="7" selected="true">dada2</option> + </param> + <when value="1"> + <expand macro="id_macro" /> + </when> + <when value="2"> + <param argument="-swarm_distance" type="integer" min="1" value="1" label="Clustering threshold for OTUs when using swarm clustering" /> + </when> + <when value="3"> + <expand macro="id_macro" /> + </when> + <when value="6"> + </when> + <when value="7"> + </when> + </conditional> + <param argument="-derepMin" type="text" value="" label="Minimum size of dereplicated raw reads (optional)" help="E.g. 4:1,4:2,3:3 . See http://lotus2.earlham.ac.uk/images/Derep_options.pdf for how to specify this parameter. If not specified, LotuS2 will select an appropriate default for the chosen clustering algorithm." /> <param argument="-deactivateChimeraCheck" type="select" label="Chimera check"> <option value="0" selected="true">OTU chimera checks</option> <option value="1">No chimera check at all</option> - <option value="2">Deactivate deNovo chimera check</option> - <option value="3">Deactivate ref based chimera check</option> + <option value="2">Disable deNovo chimera check</option> + <option value="3">Disable ref based chimera check</option> </param> <param argument="-chim_skew" type="integer" min="0" value="2" label="Skew in chimeric fragment abundance" /> <param argument="-readOverlap" type="integer" min="0" value="300" label="Maximum number of basepairs that two reads are overlapping" /> </section> <section name="tax_args" title="Taxonomy Options"> <conditional name="aligner_cond"> - <param argument="-taxAligner" type="select" label="Taxonomy aligner"> - <option value="0" selected="true">Deactivated (just use RDP)</option> - <option value="1">Blast</option> - <option value="2">Use LAMBDA to search against a 16S reference database for taxonomic profiling of OTUs</option> - <option value="3">Use UTAX with custom databases</option> - <option value="4">Use VSEARCH to align OTUs to custom databases</option> + <param argument="-taxAligner" type="select" label="Taxonomy aligner for taxonomic profiling of OTUs"> + <option value="0" selected="true">RDPclassifier (max likelihood)</option> + <option value="1">Blast LCA against custom reference database</option> + <option value="2">LAMBDA LCA against custom reference database</option> + <option value="3">UTAX likelihood corrected</option> + <option value="4">VSEARCH LCA against custom reference database</option> </param> <when value="0"> <param argument="-rdp_thr" type="float" min="0" max="1" value="0.8" label="Confidence threshold for RDP"/> @@ -182,6 +246,12 @@ <param argument="-LCA_cover" type="float" min="0" max="1" value="0.9" label="Minimum horizontal coverage of an OTU sequence against ref DB"/> <param argument="-LCA_frac" type="float" min="0" max="1" value="0.9" label="Minimum fraction of reads with identical taxonomy"/> <param argument="-greengenesSpecies" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Create greengenes output labels instead of OTU" /> + <param argument="-lulu" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Use LULU to merge OTUs based on their occurence" /> + <param argument="-buildPhylo" type="select" label="Build OTU phylogeny"> + <option value="0">Disable</option> + <option value="1" selected="true">Use fasttree2</option> + <option value="2">Use iqtree2</option> + </param> </section> </inputs> @@ -190,9 +260,8 @@ <data name="otu_biom" format="biom" label="${tool.name} on ${on_string}: biom-formatted OTU abundance matrix" from_work_dir="output/OTU.biom" /> <data name="otu_fna" format="fasta" label="${tool.name} on ${on_string}: FASTA-formatted extended OTU seed sequences" from_work_dir="output/OTU.fna" /> <data name="OTUphylo_nwk" format="newick" label="${tool.name} on ${on_string}: Newick-formatted phylogenetic tree between sequences" from_work_dir="output/OTUphylo.nwk" /> - <data name="hiera_blast" format="tabular" label="${tool.name} on ${on_string}: OTU taxonomy assignments based on Blastn" from_work_dir="output/hiera_BLAST.txt" /> - <data name="hiera_rdp" format="tabular" label="${tool.name} on ${on_string}: OTU taxonomy assignments based on RDP classifier" from_work_dir="output/hiera_RDP.txt" /> - <data name="primary" format="tar" label="${tool.name} on ${on_string}: All output files" from_work_dir="output.tar.gz" /> + <data name="mapping" format="tabular" label="${tool.name} on ${on_string}: mapping file" from_work_dir="output/primary/in.map" /> + <data name="outputs" format="tar" label="${tool.name} on ${on_string}: All output files" from_work_dir="output.tar.gz" /> </outputs> <tests> @@ -203,7 +272,7 @@ <param name="clustering" value="3" /> <output name="otu" file="OTU.txt" compare="sim_size" /> <output name="otu_fna" file="OTU.fna" compare="sim_size" /> - <output name="hiera_rdp" file="hiera_RDP.txt" compare="sim_size" /> + <output name="mapping" file="mapping.txt" /> </test> </tests>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mapping.txt Wed May 19 19:15:08 2021 +0000 @@ -0,0 +1,3 @@ +#SampleID fastqFile SequencingRun +SMPL0 Anh_sample1.fastq.gz a +SMPL1 Anh_sample2.fastq.gz a
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Wed May 19 19:15:08 2021 +0000 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed May 19 19:15:08 2021 +0000 @@ -0,0 +1,9 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables> +