diff snippy.xml @ 0:c9a8ef2aa380 draft

planemo upload commit b288d4f48e58e291bda17c5945c281348ee072c7
author iuc
date Fri, 16 Feb 2018 13:40:16 -0500
parents
children 82f2b6f20fa2
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snippy.xml	Fri Feb 16 13:40:16 2018 -0500
@@ -0,0 +1,246 @@
+<tool id="snippy" name="snippy" version="@VERSION@">
+  <description>
+      Snippy finds SNPs between a haploid reference genome and your NGS sequence reads.
+  </description>
+  <macros>
+      <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="version_command" />
+
+    <command detect_errors="exit_code"><![CDATA[
+
+        #if $ref.is_of_type("fasta")
+            cp '$ref' 'foo.fna' &&
+        #end if
+        #if $ref.is_of_type("genbank")
+            cp '$ref' 'foo.gbk' &&
+        #end if
+        snippy
+            --outdir 'out'
+            --cpus "\${GALAXY_SLOTS:-1}"
+            #if $ref.is_of_type("fasta")
+                --ref 'foo.fna'
+            #end if
+            #if $ref.is_of_type("genbank")
+                --ref 'foo.gbk'
+            #end if
+            --mapqual $adv.mapqual
+            --mincov $adv.mincov
+            --minfrac $adv.minfrac
+            #if $adv.rgid
+                --rgid '$advanced.rgid'
+            #end if
+            #if $adv.bwaopt
+                --bwaopt '$advanced.bwaopt'
+            #end if
+
+            #if str( $fastq_input.fastq_input_selector ) == "paired"
+                --pe1 '$fastq_input.fastq_input1'
+                --pe2 '$fastq_input.fastq_input2'
+            #end if
+            #if str( $fastq_input.fastq_input_selector ) == "paired_collection"
+                --pe1 '$fastq_input.fastq_input1.forward'
+                --pe2 '$fastq_input.fastq_input1.reverse'
+            #end if
+            #if str( $fastq_input.fastq_input_selector ) == "single"
+                --se '$fastq_input.fastq_input1'
+            #end if
+            #if str( $fastq_input.fastq_input_selector ) == "paired_iv"
+                --peil '$fastq_input.fastq_input1'
+            #end if
+
+        &&
+
+        gunzip out/snps.depth.gz
+
+        &&
+
+        #import re
+        #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input1.element_identifier)
+        mkdir -p ${dir_name}/reference && cp out/snps.tab out/snps.aligned.fa ${dir_name}/ && cp out/reference/ref.fa ${dir_name}/reference/ &&
+
+        tar -czf out.tgz ${dir_name}
+
+
+    ]]></command>
+
+    <inputs>
+
+        <param name="ref" type="data" format="fasta,genbank" label="Reference File (either in fasta or genbank format)" help="Fasta or Genbank file to use as the reference" />
+
+        <conditional name="fastq_input">
+            <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
+                <option value="paired">Paired</option>
+                <option value="single">Single</option>
+                <option value="paired_collection">Paired Collection</option>
+                <option value="paired_iv">Paired Interleaved</option>
+            </param>
+            <when value="paired">
+                <param name="fastq_input1" type="data" format="fastqsanger,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/>
+                <param name="fastq_input2" type="data" format="fastqsanger,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/>
+            </when>
+            <when value="single">
+                <param name="fastq_input1" type="data" format="fastqsanger,fasta" label="Select fastq dataset" help="Specify dataset with single reads"/>
+            </when>
+            <when value="paired_collection">
+                <param name="fastq_input1" format="fastqsanger,fasta" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
+            </when>
+            <when value="paired_iv">
+                <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with interleaved reads"/>
+            </when>
+        </conditional>
+
+        <section name="adv" title="Advanced parameters" expanded="false">
+            <param name="mapqual" type="integer" value="60" label="Minimum mapping quality" help="Minimum mapping quality to allow" />
+            <param name="mincov" type="integer" value="10" label="Minimum coverage" help="Minimum coverage to call a snp" />
+            <param name="minfrac" type="float" value="0.9" label="Minumum proportion for variant evidence" help="Minumum proportion for variant evidence" />
+            <param name="rgid" type="text" value="" label="Bam header @RG ID" help="Use this @RG ID: in the BAM header" />
+            <param name="bwaopt" type="text" value="" label="Extra BWA MEM options" help="Extra BWA MEM options, eg. -x pacbio" />
+        </section>
+
+        <param name="outputs" type="select" multiple="true" display="checkboxes" label="Output selection">
+            <option value="outvcf" selected="True">The final annotated variants in VCF format</option>
+            <option value="outgff" selected="False">The variants in GFF3 format</option>
+            <option value="outtab" selected="True">A simple tab-separated summary of all the variants</option>
+            <option value="outsum" selected="False">A summary of the samples and mapping</option>
+            <option value="outlog" selected="False">A log file with the commands run and their outputs</option>
+            <option value="outaln" selected="False">A version of the reference but with - at position with depth=0 and N for 0 to depth to --mincov (does not have variants)</option>
+            <option value="outcon" selected="False">A version of the reference genome with all variants instantiated</option>
+            <option value="outdep" selected="False">Output of samtools depth for the .bam file</option>
+            <option value="outbam" selected="False">The alignments in BAM format. Note that multi-mapping and unmapped reads are not present.</option>
+            <option value="outzip" selected="True">Zipped files needed for input into snippy-core</option>
+        </param>
+
+    </inputs>
+
+    <outputs>
+
+        <data format="vcf" name="snpvcf" label="${tool.name} on ${on_string} snps vcf file" from_work_dir="out/snps.vcf">
+            <filter>outputs and 'outvcf' in outputs</filter>
+        </data>
+        <data format="gff3" name="snpgff" label="${tool.name} on ${on_string} snps gff file" from_work_dir="out/snps.gff">
+            <filter>outputs and 'outgff' in outputs</filter>
+        </data>
+        <data format="tabular" name="snptab" label="${tool.name} on ${on_string} snps table" from_work_dir="out/snps.tab">
+            <filter>outputs and 'outtab' in outputs</filter>
+        </data>
+        <data format="tabular" name="snpsum" label="${tool.name} on ${on_string} snps summary" from_work_dir="out/snps.txt">
+            <filter>outputs and 'outsum' in outputs</filter>
+        </data>
+        <data format="txt" name="snplog" label="${tool.name} on ${on_string} log file" from_work_dir="out/snps.log">
+            <filter>outputs and 'outlog' in outputs</filter>
+        </data>
+        <data format="fasta" name="snpalign" label="${tool.name} on ${on_string} aligned fasta" from_work_dir="out/snps.aligned.fa">
+            <filter>outputs and 'outaln' in outputs</filter>
+        </data>
+        <data format="fasta" name="snpconsensus" label="${tool.name} on ${on_string} consensus fasta" from_work_dir="out/snps.consensus.fa">
+            <filter>outputs and 'outcon' in outputs</filter>
+        </data>
+        <data format="tabular" name="snpsdepth" label="${tool.name} on ${on_string} mapping depth" from_work_dir="out/snps.depth">
+            <filter>outputs and 'outdep' in outputs</filter>
+        </data>
+        <data format="bam" name="snpsbam" label="${tool.name} on ${on_string} mapped reads (bam)" from_work_dir="out/snps.bam">
+            <filter>outputs and 'outbam' in outputs</filter>
+        </data>
+        <data format="zip" name="outdir" label="${tool.name} on ${on_string} dir for snippy core" from_work_dir="out.tgz">
+            <filter>outputs and 'outzip' in outputs</filter>
+        </data>
+
+    </outputs>
+
+    <tests>
+
+        <test> <!-- test 1 - fasta ref default -->
+            <param name="ref" value="wildtype.fna" ftype="fasta" />
+            <param name="fastq_input_selector" value="paired" />
+            <param name="fastq_input1" ftype="fastqsanger" value="mutant_R1.fastq" />
+            <param name="fastq_input2" ftype="fastqsanger" value="mutant_R2.fastq" />
+            <param name="outputs" value="outgff,outsum" />
+            <output name="snpsum" ftype="tabular" file="fna_ref/snps.txt" lines_diff="6" />
+            <output name="snpgff" ftype="gff3" file="fna_ref/snps.gff" />
+        </test>
+
+        <test> <!-- test 2 - gbk ref default -->
+            <param name="ref" value="wildtype.gbk" ftype="genbank" />
+            <param name="fastq_input_selector" value="paired" />
+            <param name="fastq_input1" ftype="fastqsanger" value="mutant_R1.fastq" />
+            <param name="fastq_input2" ftype="fastqsanger" value="mutant_R2.fastq" />
+            <param name="outputs" value="outgff,outsum" />
+            <output name="snpsum" ftype="tabular" file="gbk_ref/snps.txt" lines_diff="6" />
+            <output name="snpgff" ftype="gff3" file="gbk_ref/snps.gff" />
+        </test>
+
+        <test> <!-- test 3 - gbk mapqual=40 -->
+            <param name="ref" value="wildtype.gbk" ftype="genbank" />
+            <param name="fastq_input_selector" value="paired" />
+            <param name="fastq_input1" ftype="fastqsanger" value="mutant_R1.fastq" />
+            <param name="fastq_input2" ftype="fastqsanger" value="mutant_R2.fastq" />
+            <param name="outputs" value="outgff,outsum" />
+            <param name="mapqual" value="40" />
+            <output name="snpsum" ftype="tabular" file="map_qual/snps.txt" lines_diff="6" />
+            <output name="snpgff" ftype="gff3" file="map_qual/snps.gff" />
+        </test>
+
+        <test> <!-- test 4 - gbk mincov=15 -->
+            <param name="ref" value="wildtype.gbk" ftype="genbank" />
+            <param name="fastq_input_selector" value="paired" />
+            <param name="fastq_input1" ftype="fastqsanger" value="mutant_R1.fastq" />
+            <param name="fastq_input2" ftype="fastqsanger" value="mutant_R2.fastq" />
+            <param name="mincov" value="15" />
+            <param name="outputs" value="outgff,outsum" />
+            <output name="snpsum" ftype="tabular" file="min_cov/snps.txt" lines_diff="6" />
+            <output name="snpgff" ftype="gff3" file="min_cov/snps.gff" />
+        </test>
+
+        <test> <!-- test 5 - gbk minfrac=0.7 -->
+            <param name="ref" value="wildtype.gbk" ftype="genbank" />
+            <param name="fastq_input_selector" value="paired" />
+            <param name="fastq_input1" ftype="fastqsanger" value="mutant_R1.fastq" />
+            <param name="fastq_input2" ftype="fastqsanger" value="mutant_R2.fastq" />
+            <param name="minfrac" value="0.7" />
+            <param name="outputs" value="outgff,outsum" />
+            <output name="snpsum" ftype="tabular" file="min_frac/snps.txt" lines_diff="6" />
+            <output name="snpgff" ftype="gff3" file="min_frac/snps.gff" />
+        </test>
+
+    </tests>
+
+
+    <help><![CDATA[
+
+**Snippy @VERSION@**
+
+    Snippy finds SNPs between a haploid reference genome and your NGS sequence reads. It will find both substitutions (snps) and insertions/deletions (indels).
+
+**Author**
+
+    Torsten Seemann
+
+**Inputs**
+
+    - NGS Reads in fastq format (single or paired end)
+    - Reference file in either fasta or genbank format
+
+If the reference file is supplied in genbank format, snpeff will be called to determine the effect of any snps found.
+
+**Advanced options**
+
+    - mapping quality - Integer -  Minimum mapping quality to allow (default '60')
+
+    - minimum coverage - Integer - Minimum coverage of variant site (default '10')
+
+    - minimum fraction - Float - Minumum proportion for variant evidence (default '0.9')
+
+    - rgid - String -         Use this @RG ID: in the BAM header (default '')
+
+    - bwaopt - Extra BWA MEM options, eg. -x pacbio (default '')
+
+**Further information**
+
+    For a much more in depth description of snippy and how it works, see https://github.com/tseemann/snippy
+
+    ]]></help>
+  <expand macro="citations"/>
+
+</tool>