Mercurial > repos > iuc > snippy
changeset 5:0aa87d97847f draft
"planemo upload commit 13d17dd18915767d3ca5bbd92ce3e5e80a287112"
author | iuc |
---|---|
date | Fri, 13 Sep 2019 05:45:21 -0400 |
parents | 9bccc8404a3c |
children | 4df82423ba61 |
files | macros.xml snippy-core.xml snippy.xml test-data/a_fna_ref_mincov_2_minqual_60.snps.txt test-data/all_fasta.loc test-data/b_2_fna_ref_mincov_2_minqual_60.snps.gff test-data/b_fna_ref_mincov_2_minqual_60.snps.gff test-data/b_fna_ref_mincov_2_minqual_60.snps.txt tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 11 files changed, 171 insertions(+), 46 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Thu Jul 11 09:41:13 2019 -0400 +++ b/macros.xml Fri Sep 13 05:45:21 2019 -0400 @@ -10,7 +10,53 @@ <version_command><![CDATA[snippy --version]]></version_command> </xml> - <token name="@VERSION@">4.3.6</token> + <token name="@REFERENCE_SOURCE_FILE@"> + <![CDATA[ + #if $reference_source.reference_source_selector == 'history' + #if $reference_source.ref_file.is_of_type("fasta") + ln -sf '$reference_source.ref_file' 'ref.fna' && + #elif $reference_source.ref_file.is_of_type("genbank") + ln -sf '$reference_source.ref_file' 'ref.gbk' && + #end if + #elif $reference_source.reference_source_selector == 'cached' + ln -sf '$reference_source.ref_file.fields.path' 'ref.fna' && + #end if]]> + </token> + + <token name="@REFERENCE_COMMAND@"> + <![CDATA[ + #if $reference_source.reference_source_selector == 'history' + #if $reference_source.ref_file.is_of_type("fasta") + --ref 'ref.fna' + #elif $reference_source.ref_file.is_of_type("genbank") + --ref 'ref.gbk' + #end if + #elif $reference_source.reference_source_selector == 'cached' + --ref 'ref.fna' + #end if + ]]> + </token> + + <xml name="reference_selector"> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below. If you would like to perform self-mapping select `history` here, then choose your input file as reference."> + <option value="cached">Use a built-in genome index</option> + <option value="history">Use a genome from history and build index</option> + </param> + <when value="cached"> + <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> + <options from_data_table="all_fasta"> + <validator type="no_options" message="No reference genomes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta,genbank" label="Use the following dataset as the reference sequence" help="You can upload a FASTA or FASTQ sequence to the history and use it as reference" /> + </when> + </conditional> + </xml> + + <token name="@VERSION@">4.4.3</token> <xml name="citations"> <citations>
--- a/snippy-core.xml Thu Jul 11 09:41:13 2019 -0400 +++ b/snippy-core.xml Fri Sep 13 05:45:21 2019 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="utf-8"?> -<tool id="snippy_core" name="snippy-core" version="@VERSION@"> +<tool id="snippy_core" name="snippy-core" version="@VERSION@+galaxy0"> <description> Combine multiple Snippy outputs into a core SNP alignment </description> @@ -8,19 +8,20 @@ </macros> <expand macro="requirements" /> <command detect_errors="exit_code"><![CDATA[ + @REFERENCE_SOURCE_FILE@ #for $indir in $indirs #set $sample_name = os.path.splitext(os.path.basename(str($indir.name)))[0] mkdir '$sample_name' && tar -xf '$indir' -C '$sample_name' --strip-components=1 && #end for #set snippy_dirs = " ".join(["'{0}'".format(os.path.splitext(os.path.basename(str($indir.name)))[0]) for $indir in $indirs]) snippy-core - --ref '$ref' + @REFERENCE_COMMAND@ ${snippy_dirs} ]]></command> <inputs> <param name="indirs" type="data" multiple="true" format="zip" label="Snippy input zipped dirs" help="Select all the snippy inputs for alignment" /> - <param name="ref" type="data" format="fasta,genbank" label="Reference File (either in fasta or genbank format)" help="Fasta or Genbank file to use as the reference" /> + <expand macro="reference_selector" /> <param name="outputs" type="select" multiple="true" display="checkboxes" label="Output selection"> <option value="outaln" selected="True">A core SNP alignment in the fasta format</option> <option value="outfull" selected="False">A whole genome SNP alignment (includes invariant sites)</option> @@ -48,7 +49,19 @@ <tests> <test><!-- Test #1 - test with 3 zipped directories --> <param name="indirs" value="a.tgz,b.tgz,c.tgz" /> - <param name="ref" value="reference.fasta" /> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="reference.fasta" ftype="fasta"/> + </conditional> + <param name="outputs" value="outtxt" /> + <output name="alignment_summary" ftype="txt" file="a_b_c.core.txt" /> + </test> + <test><!-- Test #2 - test with 3 zipped directories --> + <param name="indirs" value="a.tgz,b.tgz,c.tgz" /> + <conditional name="reference_source"> + <param name="reference_source_selector" value="cached"/> + <param name="ref_file" value="test_id"/> + </conditional> <param name="outputs" value="outtxt" /> <output name="alignment_summary" ftype="txt" file="a_b_c.core.txt" /> </test>
--- a/snippy.xml Thu Jul 11 09:41:13 2019 -0400 +++ b/snippy.xml Fri Sep 13 05:45:21 2019 -0400 @@ -1,31 +1,22 @@ -<tool id="snippy" name="snippy" version="@VERSION@+galaxy2"> +<tool id="snippy" name="snippy" version="@VERSION@+galaxy0"> <description> Snippy finds SNPs between a haploid reference genome and your NGS sequence reads. - </description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements" /> - <expand macro="version_command" /> + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> <command detect_errors="exit_code"><![CDATA[ - #if $ref.is_of_type("fasta") - cp '$ref' 'ref.fna' && - #end if - #if $ref.is_of_type("genbank") - cp '$ref' 'ref.gbk' && - #end if + @REFERENCE_SOURCE_FILE@ + snippy --outdir 'out' --cpus \${GALAXY_SLOTS:-1} --ram \$((\${GALAXY_MEMORY_MB:-4096}/1024)) - #if $ref.is_of_type("fasta") - --ref 'ref.fna' - #end if - #if $ref.is_of_type("genbank") - --ref 'ref.gbk' - #end if + @REFERENCE_COMMAND@ --mapqual $adv.mapqual --mincov $adv.mincov --minfrac $adv.minfrac @@ -69,12 +60,10 @@ #end if - ]]></command> + ]]> </command> <inputs> - - <param name="ref" type="data" format="fasta,genbank" label="Reference File (either in fasta or genbank format)" help="Fasta or Genbank file to use as the reference" /> - + <expand macro="reference_selector" /> <conditional name="fastq_input"> <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> <option value="paired">Paired</option> @@ -115,7 +104,6 @@ <option value="outlog" selected="False">A log file with the commands run and their outputs</option> <option value="outaln" selected="False">A version of the reference but with - at position with depth=0 and N for 0 to depth to --mincov (does not have variants)</option> <option value="outcon" selected="False">A version of the reference genome with all variants instantiated</option> - <option value="outdep" selected="False">Output of samtools depth for the .bam file</option> <option value="outbam" selected="False">The alignments in BAM format. Note that multi-mapping and unmapped reads are not present.</option> <option value="outzip" selected="True">Zipped files needed for input into snippy-core</option> </param> @@ -145,9 +133,6 @@ <data format="fasta" name="snpconsensus" label="${tool.name} on ${on_string} consensus fasta" from_work_dir="out/snps.consensus.fa"> <filter>outputs and 'outcon' in outputs</filter> </data> - <data format="tabular" name="snpsdepth" label="${tool.name} on ${on_string} mapping depth" from_work_dir="out/snps.depth"> - <filter>outputs and 'outdep' in outputs</filter> - </data> <data format="bam" name="snpsbam" label="${tool.name} on ${on_string} mapped reads (bam)" from_work_dir="out/snps.bam"> <filter>outputs and 'outbam' in outputs</filter> </data> @@ -159,8 +144,12 @@ <tests> - <test> <!-- test 0 - fasta ref no snps --> - <param name="ref" value="reference.fasta" ftype="fasta" /> + <test> <!-- test 0 - fasta ref no snps --> + <!-- <param name="ref" value="reference.fasta" ftype="fasta" /> --> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="reference.fasta" ftype="fasta"/> + </conditional> <param name="fastq_input_selector" value="paired" /> <param name="fastq_input1" ftype="fastqsanger" value="a_1.fastq" /> <param name="fastq_input2" ftype="fastqsanger" value="a_2.fastq" /> @@ -171,8 +160,11 @@ <output name="snpgff" ftype="gff3" file="a_fna_ref_mincov_2_minqual_60.snps.gff" /> </test> - <test> <!-- test 1 - fasta ref one snp --> - <param name="ref" value="reference.fasta" ftype="fasta" /> + <test> <!-- test 1 - fasta ref one snp --> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="reference.fasta" ftype="fasta"/> + </conditional> <param name="fastq_input_selector" value="paired" /> <param name="fastq_input1" ftype="fastqsanger" value="b_1.fastq" /> <param name="fastq_input2" ftype="fastqsanger" value="b_2.fastq" /> @@ -183,8 +175,11 @@ <output name="snpgff" ftype="gff3" file="b_fna_ref_mincov_2_minqual_60.snps.gff" /> </test> - <test> <!-- test 2 - fasta ref one snp paired_collection --> - <param name="ref" value="reference.fasta" ftype="fasta" /> + <test> <!-- test 2 - fasta ref one snp paired_collection --> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="reference.fasta" ftype="fasta"/> + </conditional> <param name="fastq_input_selector" value="paired_collection" /> <param name="fastq_input"> <collection type="paired"> @@ -199,8 +194,25 @@ <output name="snpgff" ftype="gff3" file="b_fna_ref_mincov_2_minqual_60.snps.gff" /> </test> - <test> <!-- test 3 - fasta ref one snp single --> - <param name="ref" value="reference.fasta" ftype="fasta" /> + <test> <!-- test 3 - fasta ref one snp single --> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="reference.fasta" ftype="fasta"/> + </conditional> + <param name="fastq_input_selector" value="single" /> + <param name="fastq_input_single" value="b_2.fastq" ftype="fastqsanger" /> + <param name="mincov" value="2" /> + <param name="minqual" value="60" /> + <param name="outputs" value="outgff,outsum" /> + <output name="snpsum" ftype="tabular" file="b_fna_ref_mincov_2_minqual_60.snps.txt" lines_diff="6" /> + <output name="snpgff" ftype="gff3" file="b_2_fna_ref_mincov_2_minqual_60.snps.gff" /> + </test> + + <test> <!-- test 4 - reference source as cached --> + <conditional name="reference_source"> + <param name="reference_source_selector" value="cached"/> + <param name="ref_file" value="test_id"/> + </conditional> <param name="fastq_input_selector" value="single" /> <param name="fastq_input_single" value="b_2.fastq" ftype="fastqsanger" /> <param name="mincov" value="2" /> @@ -247,7 +259,7 @@ For a much more in depth description of snippy and how it works, see https://github.com/tseemann/snippy - ]]></help> - <expand macro="citations"/> + ]]> </help> + <expand macro="citations"/> </tool>
--- a/test-data/a_fna_ref_mincov_2_minqual_60.snps.txt Thu Jul 11 09:41:13 2019 -0400 +++ b/test-data/a_fna_ref_mincov_2_minqual_60.snps.txt Fri Sep 13 05:45:21 2019 -0400 @@ -2,5 +2,5 @@ ReadFiles a_1.fastq a_2.fastq Reference reference.fasta ReferenceSize 700 -Software snippy 4.3.6 +Software snippy 4.4.3 VariantTotal 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Fri Sep 13 05:45:21 2019 -0400 @@ -0,0 +1,20 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +test_id test_dbkey test display name ${__HERE__}/ref.fna +
--- a/test-data/b_2_fna_ref_mincov_2_minqual_60.snps.gff Thu Jul 11 09:41:13 2019 -0400 +++ b/test-data/b_2_fna_ref_mincov_2_minqual_60.snps.gff Fri Sep 13 05:45:21 2019 -0400 @@ -1,2 +1,2 @@ ##gff-version 3 -reference snippy:4.3.6 variation 4 4 . . 0 note=snp A=>T T:5 A:0 +reference snippy:4.4.3 variation 4 4 . . 0 note=snp A=>T T:5 A:0
--- a/test-data/b_fna_ref_mincov_2_minqual_60.snps.gff Thu Jul 11 09:41:13 2019 -0400 +++ b/test-data/b_fna_ref_mincov_2_minqual_60.snps.gff Fri Sep 13 05:45:21 2019 -0400 @@ -1,2 +1,2 @@ ##gff-version 3 -reference snippy:4.3.6 variation 4 4 . . 0 note=snp A=>T T:10 A:0 +reference snippy:4.4.3 variation 4 4 . . 0 note=snp A=>T T:10 A:0
--- a/test-data/b_fna_ref_mincov_2_minqual_60.snps.txt Thu Jul 11 09:41:13 2019 -0400 +++ b/test-data/b_fna_ref_mincov_2_minqual_60.snps.txt Fri Sep 13 05:45:21 2019 -0400 @@ -2,6 +2,6 @@ ReadFiles b_1.fastq b_2.fastq Reference reference.fasta ReferenceSize 700 -Software snippy 4.3.6 +Software snippy 4.4.3 Variant-SNP 1 VariantTotal 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Fri Sep 13 05:45:21 2019 -0400 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Sep 13 05:45:21 2019 -0400 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Sep 13 05:45:21 2019 -0400 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> +</tables>