changeset 9:c0a6e8f595ec default tip

Add option to set VCF ID field value, this can be used to ID germline variants for SnpSift
author Jim Johnson <jj@umn.edu>
date Thu, 11 Apr 2013 10:28:10 -0500
parents 07cd87e94fbe
children
files README pileup_to_vcf.py pileup_to_vcf.xml
diffstat 3 files changed, 13 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/README	Thu Mar 28 14:55:50 2013 -0500
+++ b/README	Thu Apr 11 10:28:10 2013 -0500
@@ -2,4 +2,4 @@
 Filters on read coverage, base quality and the frequency of a variant.  
 The VCF info is populated with a SAF tag that give the specific frequency (0 - 1) of a variant being observed.
 This is used for filtering in the mmuff ( Missense Mutation and Frameshift Finder) workflow.
-
+The VCF ID field can be optionally set, which can be used with SnpSift annotate or SnpSift filter.
--- a/pileup_to_vcf.py	Thu Mar 28 14:55:50 2013 -0500
+++ b/pileup_to_vcf.py	Thu Apr 11 10:28:10 2013 -0500
@@ -34,7 +34,7 @@
 
 vcf_header =  """\
 ##fileformat=VCFv4.0
-##source=pileup_to_vcf.pyV1.1
+##source=pileup_to_vcf.pyV1.2
 ##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">
 ##INFO=<ID=SAF,Number=.,Type=Float,Description=\"Specific Allele Frequency\">
 ##FILTER=<ID=DP,Description=\"Minimum depth of %s\">
@@ -55,6 +55,8 @@
   parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' )
   parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' )
   parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' )
+  # ID to use 
+  parser.add_option( '-I', '--id', dest='id', default=None, help='The value for the VCF ID field' )
   # select columns
   parser.add_option( '-C', '--chrom_col', type='int', default='1', dest='chrom_col', help='The ordinal position (starting with 1) of the chromosome column' )
   parser.add_option( '-P', '--pos_col', type='int', default='2', dest='pos_col', help='The ordinal position (starting with 1) of the position column' )
@@ -117,6 +119,8 @@
   else:
     outputFile = sys.stdout
 
+  vcf_id = options.id if options.id else "."
+
   indel_len_pattern = '([1-9][0-9]*)'
   ref_skip_pattern = '[<>]'
 
@@ -251,7 +255,6 @@
             alts.append(vcf_ref[:len(vcf_ref) - len(k)])   # TODO alt will be a substring of vcf_ref,  test this
             safs.append(saf)
       if len(alts) > 0:
-        vcf_id = "."
         vcf_qual = "." 
         vcf_filter = "PASS"
         # if not allow_multiples, report only the most freq alt
--- a/pileup_to_vcf.xml	Thu Mar 28 14:55:50 2013 -0500
+++ b/pileup_to_vcf.xml	Thu Apr 11 10:28:10 2013 -0500
@@ -1,4 +1,4 @@
-<tool id="pileup_to_vcf" name="Pileup to VCF" version="2.1">
+<tool id="pileup_to_vcf" name="Pileup to VCF" version="2.2">
   <description>Converts a pileup to VCF with filtering</description>
   <command interpreter="python">pileup_to_vcf.py -i $input_file -o $output_file 
     #if $min_cvrg.__str__  != '':
@@ -15,6 +15,9 @@
     #end if
     $allow_multiples
     $snps_only
+    #if $vcf_id.__str__ != '':
+      --id $vcf_id
+    #end if
     #if $cols.select_order == 'yes' :
       #if $chrom_col.__str__  != '':
         --chrom_col $chrom_col 
@@ -65,6 +68,9 @@
       <option value="qual">Reads at this position taht pass the base call quality threshold</option>
       <option value="all">All reads and indels</option>
     </param>
+    <param name="vcf_id" type="text" optional="true" value="" label="The VCF ID" help="The VCF output will use this as the ID field value">
+      <validator type="regex" message="whitespace characters not allowed">^\S*$</validator>
+    </param>
   </inputs>
   <outputs>
     <data format="vcf" metadata_source="input_file" name="output_file" />