changeset 4:6d2a5f0859cf draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 9df9b52baf62b70fbcfc3fbe965d7197d4e8738e
author iuc
date Tue, 31 Jan 2017 12:41:31 -0500
parents 5337db17a5f7
children 0a564427739d
files bcftools_call.xml macros.xml
diffstat 2 files changed, 53 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/bcftools_call.xml	Mon Oct 03 12:19:38 2016 -0400
+++ b/bcftools_call.xml	Tue Jan 31 12:41:31 2017 -0500
@@ -1,9 +1,32 @@
 <?xml version='1.0' encoding='utf-8'?>
-<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@VERSION@.1">
+<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@VERSION@.2">
     <description>SNP/indel variant calling from VCF/BCF</description>
     <macros>
         <token name="@EXECUTABLE@">call</token>
         <import>macros.xml</import>
+        <xml name="macro_novel_rate">
+            <param name="novel_rate_snp" type="float" label="Novel Rate SNP" value="" optional="true" 
+                   help="likelihood of novel mutation for constrained trio calling, see man page for details" />
+            <param name="novel_rate_del" type="float" label="Novel Rate Deletions" value="" optional="true" 
+                   help="likelihood of novel mutation for constrained trio calling, see man page for details" />
+            <param name="novel_rate_ins" type="float" label="Novel Rate Insertions" value="" optional="true" 
+                   help="likelihood of novel mutation for constrained trio calling, see man page for details" />
+        </xml>
+        <token name="@NOVEL_RATE@">
+            #set $novel_rate = []
+            #if str($section.genotypes.novel_rate_snp):
+              #silent $novel_rate.append(str($section.genotypes.novel_rate_snp))
+            #end if
+            #if str($section.genotypes.novel_rate_del):
+              #silent $novel_rate.append(str($section.genotypes.novel_rate_del))
+            #end if
+            #if str($section.genotypes.novel_rate_ins):
+              #silent $novel_rate.append(str($section.genotypes.novel_rate_ins))
+            #end if
+            #if len($novel_rate) > 0:
+               --novel-rate '#echo ','.join($novel_rate)#'
+            #end if
+        </token>
     </macros>
     <expand macro="requirements" />
     <expand macro="version_command" />
@@ -34,9 +57,7 @@
  #else
    #if $section.genotypes.constrain == 'trio':
     --constrain trio
-    #if $section.genotypes.novel_rate:
-     --novel-rate '$section.genotypes.novel_rate'
-    #end if
+    @NOVEL_RATE@
    #end if
    #set $section = $sec_consensus_variant_calling.variant_calling.genotypes
    @TARGETS@
@@ -114,7 +135,7 @@
                         </when>
                         <when value="trio">
                             <expand macro="macro_targets" />
-                            <param name="novel_rate" type="float" label="Novel Rate" value="1e-8,1e-9,1e-9" optional="true" help="likelihood of novel mutation for constrained trio calling, see man page for details" />
+                            <expand macro="macro_novel_rate" />
                         </when>
                     </conditional>
                     <param name="gvcf" type="integer" label="gvcf" optional="True" help="group non-variant sites into gVCF blocks by minimum per-sample DP" />
@@ -128,7 +149,7 @@
                         <when value="none">
                         </when>
                         <when value="trio">
-                            <param name="novel_rate" type="float" label="Novel Rate" value="1e-8,1e-9,1e-9" optional="true" help="likelihood of novel mutation for constrained trio calling, see man page for details" />
+                            <expand macro="macro_novel_rate" />
                         </when>
                     </conditional>
                     <expand macro="macro_targets" />
@@ -227,6 +248,8 @@
   - Some of the original functionality has been temporarily lost in the process of transition to htslib, but will be added back on popular demand. 
   - The original calling model can be invoked with the -c option.
 
+The novel-rate option can be set to modify the likelihood of novel mutation for constrained -C trio calling. The trio genotype calling maximizes likelihood of a particular combination of genotypes for father, mother and the child P(F=i,M=j,C=k) = P(unconstrained) * Pn + P(constrained) * (1-Pn). By providing three values, the mutation rate Pn is set explicitly for SNPs, deletions and insertions, respectively. If two values are given, the first is interpreted as the mutation rate of SNPs and the second is used to calculate the mutation rate of indels according to their length as Pn=float*exp(-a-b*len), where a=22.8689, b=0.2994 for insertions and a=21.9313, b=0.2856 for deletions [pubmed:23975140]. If only one value is given, the same mutation rate Pn is used for SNPs and indels. 
+
 @REGIONS_HELP@
 @TARGETS_HELP@
 
--- a/macros.xml	Mon Oct 03 12:19:38 2016 -0400
+++ b/macros.xml	Tue Jan 31 12:41:31 2017 -0500
@@ -14,14 +14,16 @@
       <requirement type="package" version="1.3">bcftools</requirement>
       <!-- conda dependency -->
       <requirement type="package" version="1.3">htslib</requirement>
+      <!-- htslib provides tabix and bgzip
       <requirement type="package" version="0.2.6">tabix</requirement>
+      -->
       <requirement type="package" version="1.2">samtools</requirement>
     </requirements>
   </xml>
   <xml name="version_command">
     <version_command>bcftools 2&gt;&amp;1 | grep 'Version:'</version_command>
   </xml>
-  
+
   <xml name="citations">
     <citations>
       <citation type="doi">10.1093/bioinformatics/btp352</citation>
@@ -45,20 +47,20 @@
 <![CDATA[
 ## May need to symlink input if there is an associated
 #set $input_vcf = 'input.vcf.gz'
-#if $input_file.datatype.file_ext == 'vcf'
+#if $input_file.is_of_type('vcf')
   bgzip -c "$input_file" > $input_vcf &&
   bcftools index $input_vcf &&
-#elif $input_file.datatype.file_ext == 'vcf_bgzip'
+#elif $input_file.is_of_type('vcf_bgzip')
   ln -s "$input_file" $input_vcf
-#elif $input_file.datatype.file_ext == 'bcf'
+#elif $input_file.is_of_type('bcf')
   #set $input_vcf = 'input.bcf'
-  ln -s "$input_file" $input_vcf && 
+  ln -s "$input_file" $input_vcf &&
   #if $input_file.metadata.bcf_index:
     ln -s $input_file.metadata.bcf_index ${input_vcf}.csi &&
-  #else 
+  #else
     bcftools index $input_vcf &&
   #end if
-#elif $input_file.datatype.file_ext == 'bcf_bgzip'
+#elif $input_file.is_of_type('bcf_bgzip')
   ln -s "$input_file" $input_vcf
 #end if
 ]]>
@@ -77,21 +79,21 @@
 #set $vcfs_list_file = 'vcfs_list'
 #for (i,input_file) in enumerate($input_files):
   #set $input_vcf = 'input' + str($i) + '.vcf.gz'
-  #if $input_file.datatype.file_ext == 'vcf'
+  #if $input_file.is_of_type('vcf')
     bgzip -c "$input_file" > $input_vcf &&
     bcftools index $input_vcf &&
-  #elif $input_file.datatype.file_ext == 'vcf_bgz'
+  #elif $input_file.is_of_type('vcf_bgz')
     ln -s "$input_file" $input_vcf
-  #elif $input_file.datatype.file_ext == 'bcf'
+  #elif $input_file.is_of_type('bcf')
     #set $input_vcf = 'input' + str($i) + '.bcf.gz'
     ## bgzip -c "$input_file" > $input_vcf &&
     ln -s "$input_file" $input_vcf &&
     #if $input_file.metadata.bcf_index:
       ln -s $input_file.metadata.bcf_index ${input_vcf}.csi &&
-    #else 
+    #else
       bcftools index $input_vcf &&
     #end if
-  #elif $input_file.datatype.file_ext == 'bcfvcf_bgz'
+  #elif $input_file.is_of_type('bcfvcf_bgz')
     ln -s "$input_file" $input_vcf &&
   #end if
   echo '$input_vcf' >> $vcfs_list_file &&
@@ -225,7 +227,7 @@
   </token>
 
   <xml name="macro_apply_filters">
-    <param name="apply_filters" type="text" value="" label="Apply Filters" optional="true" 
+    <param name="apply_filters" type="text" value="" label="Apply Filters" optional="true"
            help="(-f --apply-filters) Skip sites where FILTER column does not contain any of the strings listed (e.g. &quot;PASS,.&quot;)">
       <validator type="regex" message="FILTER terms separated by commas">^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$</validator>
     </param>
@@ -272,7 +274,7 @@
         </param>
         <when value="__none__"/>
         <when value="regions">
-            <param name="regions" type="text" value="" label="restrict to comma-separated list of regions" optional="true" 
+            <param name="regions" type="text" value="" label="restrict to comma-separated list of regions" optional="true"
                    help="Each region is specifed as: chr or chr:pos or chr:from-to">
                  <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
             </param>
@@ -299,7 +301,7 @@
   <token name="@PREPARE_TARGETS_FILE@">
 <![CDATA[
 #set $targets_path = None
-#if 'targets' in $section 
+#if 'targets' in $section
   #if $section.targets.targets_src == 'targets_file':
     #set $targets_path = 'targets_file.tab.gz'
     bgzip -c "$section.targets.targets_file" > $targets_path &&
@@ -331,7 +333,7 @@
         </param>
         <when value="__none__"/>
         <when value="targets">
-            <param name="targets" type="text" value="" label="Restrict to comma-separated list of targets" optional="true" 
+            <param name="targets" type="text" value="" label="Restrict to comma-separated list of targets" optional="true"
                    help="Each target is specifed as: chr or chr:pos or chr:from-to">
                  <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
             </param>
@@ -358,15 +360,15 @@
   </token>
 
   <xml name="macro_samples">
-      <param name="samples" type="text" value="" label="Samples" optional="true" 
+      <param name="samples" type="text" value="" label="Samples" optional="true"
              help="(-s) comma separated list of samples to annotate (or exclude with &quot;^&quot; prefix)">
           <validator type="regex" message="">^(\w+(,\w+)*)?$</validator>
       </param>
-      <param name="invert_samples" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples" 
+      <param name="invert_samples" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples"
              help="inverts the query/filtering applied by Samples" />
-      <param name="samples_file" type="data" format="tabular" label="Samples File" optional="True" 
+      <param name="samples_file" type="data" format="tabular" label="Samples File" optional="True"
              help="(-S) file of samples to include" />
-      <param name="invert_samples_file" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples File" 
+      <param name="invert_samples_file" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples File"
              help="inverts the query/filtering applied by Samples File" />
   </xml>
   <token name="@SAMPLES@">
@@ -416,7 +418,7 @@
   </token>
 
   <xml name="macro_columns">
-    <param name="columns" type="text" value="" label="Columns" optional="true" 
+    <param name="columns" type="text" value="" label="Columns" optional="true"
             help="list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details">
         <validator type="regex" message="COLUMN names  separated by commas">^([^,]+(,[^,]+)*)?$</validator>
     </param>
@@ -443,7 +445,7 @@
 Output Type
 -----------
 
-Output compressed BCF (b), or uncompressed VCF (v). 
+Output compressed BCF (b), or uncompressed VCF (v).
 Use the BCF option when piping between bcftools subcommands to speed up
 performance by removing unecessary compression/decompression
 and VCF<->BCF conversion.