Repository 'vcfgandalftools'
hg clone https://toolshed.g2.bx.psu.edu/repos/urgi-team/vcfgandalftools

Changeset 2:6bebeb76fa8d (2016-04-05)
Previous changeset 1:cfd4eaadad42 (2015-12-15) Next changeset 3:1fd1f727c330 (2016-04-08)
Commit message:
Uploaded
added:
VCFGandalfTools/VCFCarto_wrapper.py
VCFGandalfTools/VCFCarto_wrapper.xml
VCFGandalfTools/VCFFiltering_wrapper.py
VCFGandalfTools/VCFFiltering_wrapper.xml
VCFGandalfTools/VCFStorage_wrapper.py
VCFGandalfTools/VCFStorage_wrapper.xml
VCFGandalfTools/test-data/Expchr17.tab
VCFGandalfTools/test-data/VCFCarto_input.tab
VCFGandalfTools/test-data/VCFCarto_output.tab
VCFGandalfTools/test-data/VCFCarto_output_merged.bed
VCFGandalfTools/test-data/VCFCarto_output_merged.tab
VCFGandalfTools/test-data/VCFFiltering_DP_4_200_output.vcf
VCFGandalfTools/test-data/VCFFiltering_DPauto_output.vcf
VCFGandalfTools/test-data/VCFFiltering_input.vcf
VCFGandalfTools/test-data/chr17.VCF
VCFGandalfTools/test-data/chr17.fa
VCFGandalfTools/tool_dependencies.xml
removed:
VCFCarto_wrapper.py
VCFCarto_wrapper.xml
VCFFiltering_wrapper.py
VCFFiltering_wrapper.xml
VCFStorage_wrapper.py
VCFStorage_wrapper.xml
test-data/Expchr17.tab
test-data/VCFCarto_input.tab
test-data/VCFCarto_output.tab
test-data/VCFCarto_output_merged.bed
test-data/VCFCarto_output_merged.tab
test-data/VCFFiltering_DP_4_200_output.vcf
test-data/VCFFiltering_DPauto_output.vcf
test-data/VCFFiltering_input.vcf
test-data/chr17.VCF
test-data/chr17.fa
tool_dependencies.xml
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFCarto_wrapper.py
--- a/VCFCarto_wrapper.py Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,107 +0,0 @@
-#!/usr/bin/env python
-
-
-import subprocess, tempfile, sys, os, glob, shutil, time
-from optparse import OptionParser
-
-
-class VCFCartoWrapper(object):
-
-    def __init__(self):
-        self._options = None
-        
-        
-    def stop_err(self, msg ):
-        sys.stderr.write( "%s\n" % msg )
-        sys.exit()
-        
-        
-    def setAttributesFromCmdLine(self):
-        description = "VCFcarto can convert your tabulated file into a file with only the SNP from refA and refH.\n"
-        description += "2 formats are possible, either the input format is conserved, or the format is changed into a 3 letter format\n"
-        description += "(\"A\" for refA, \"H\" for refH and \"-\" when the base do not correspond to any parent)\n\n"
-        description += "example 1 : VCFcarto.py -f Storage.out -A G15 -H G23 -o FilteredStorage.out\n"
-        description += "example 2 : VCFcarto.py -f Storage.out -A ref1 -H ref2 -p -s -g -m -o cartoTable.out\n"
-        parser = OptionParser(description = description, version = "0.1") 
-        parser.add_option("-f", "--file",      dest = "tableName", action = "store", type = "string", help = "Input TSV File name [compulsory] [format: TSV]",                                                                                                    default = "")
-        parser.add_option("-o", "--output",    dest = "outFile",   action = "store", type = "string", help = "output TSV File name [compulsory] [format: TSV]",                                                                                                   default = "")
-        parser.add_option("-A", "--refA",      dest = "refA",      action = "store", type = "string", help = "name of the reference genome A [compulsory] ",                                                                                                      default = "")
-        parser.add_option("-H", "--refH",      dest = "refH",      action = "store", type = "string", help = "name of the reference genome H [compulsory] ",                                                                                                      default = "")
-        parser.add_option("-p", "--onlyParents",   dest = "onlyPar",   action = "store_true",         help = "Will change every letters by either A or H depending on which parents the strain correspond to for that base[optional] [default: False]",           default = False)
-        parser.add_option("-m", "--mergeMarkers",   dest = "mergeMarkers",   action = "store_true",   help = "Will merge sequential markers with the same information ; option -p is needed [optional] [default: False]",                                         default = False)
-        parser.add_option("-M", "--mergeFile", dest = "mergeFile", action = "store", type = "string", help = "name for the mergeFile ",                                                                                                                           default = "")
-        parser.add_option("-g", "--graphics",  dest = "graphs",    action = "store_true",             help = "create graphs. Only works with -p[optional] [default: False]",                                                                                      default = False)
-        parser.add_option("-G", "--graphHTML", dest = "graphHTML", action = "store", type = "string", help = "name of the HTML linking to graphs ",                                                                                                              default = "")
-        parser.add_option("-d", "--dirGraphs", dest = "dirGraphs", action = "store", type = "string", help = "name of the folder containing graphs ",                                                                                                              default = "")
-        options = parser.parse_args()[0]
-        self._setAttributesFromOptions(options)
-
-
-    def _setAttributesFromOptions(self, options):
-        self._options = options
-
-    def run(self):
-        prg = "VCFCarto.py"
-        args = ""
-        args += "-f %s" % self._options.tableName
-        args += " "
-        args += "-o %s" % self._options.outFile
-        args += " "
-        args += "-A %s" % self._options.refA
-        args += " "
-        args += "-H %s" % self._options.refH
-        args += " "
-        args += "-v 2 "
-        if self._options.onlyPar :
-            args += " "
-            args += "-p"
-            if self._options.mergeMarkers :
-                args += " "
-                args += "-m"
-            if self._options.graphs :
-                args += " "
-                args += "-g"
-        cmd = "%s %s" %(prg, args)
-        
-        print cmd
-        
-        try:
-            tmp_err = tempfile.NamedTemporaryFile().name
-            tmp_stderr = open( tmp_err, 'wb' )
-            proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stderr=tmp_stderr )
-            returncode = proc.wait()
-            tmp_stderr.close()
-            # get stderr, allowing for case where it's very large
-            tmp_stderr = open( tmp_err, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-            if stderr:
-                raise Exception, stderr
-        except Exception, e:
-            self.stop_err( 'Error in VCFCarto:\n' + str( e ) ) 
-        
-        if self._options.mergeMarkers :
-            shutil.move("markerList.bed" ,self._options.mergeFile)
-        if self._options.graphs :
-            html = open(self._options.graphHTML, "w")
-            
-            os.mkdir(self._options.dirGraphs)
-            lGraphsFiles = glob.glob("VCFCarto_graphs/*")
-            for file in lGraphsFiles :
-                baseName = os.path.basename(file)
-                shutil.move( file ,"%s/%s" %(self._options.dirGraphs, baseName))
-                line = "<img src=\"%s\" > \n" %(baseName)
-                html.write(line)
-                
-if __name__ == "__main__":
-    iWrapper = VCFCartoWrapper()
-    iWrapper.setAttributesFromCmdLine()
-    iWrapper.run()    
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFCarto_wrapper.xml
--- a/VCFCarto_wrapper.xml Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,315 +0,0 @@\n-<tool id="VCFCarto" name="VCFCarto" version="0.01">\n-    <description>VCFcarto can convert a tabulated marker file into a file with only the markers from 2 parents </description>\n-    <requirements>\n-        <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>\n-    </requirements>\n-    <version_command>\n-  VCFCarto.py --version\n-    </version_command>\n-    <command interpreter="python">\n-    VCFCarto_wrapper.py -f $inputTabular -o $outputVCFCarto -A $parentA -H $parentH\n-    #if str($outputType) == "carto"\n-     -p -g --graphHTML $output_html --dirGraphs "$output_html.files_path"\n-    #end if\n-    #if str($outputType) == "MergedCarto"\n-     -p -g --graphHTML $output_html --dirGraphs "$output_html.files_path" -m --mergeFile $output_bed\n-    #end if\n-    </command>\n-    <inputs>\n-        <param name="inputTabular" type="data" format="tabular" label="indicate your tabulated marker file"/>\n-        <param name="parentA" size="20" type="text" value="V1" label="indicate parent 1 name (A)"/>\n-        <param name="parentH" size="20" type="text" value="V2" label="indicate parent 2 name (H)"/>\n-        <param name="outputType" type="select" display="radio" label="select type of output" multiple="False">\n-            <option value="raw" >7 caracter code</option>\n-            <option value="carto" >A - H code</option>\n-            <option value="MergedCarto" >A - H code and merge</option>\n-        </param>\n-    </inputs>\n-    <outputs>\n-        <data format="tabular" name="outputVCFCarto" label="${tool.name} on ${on_string} (tabular)"/>\n-        <data format="html" name="output_html" label="${tool.name} graphs on ${on_string} (html)">\n-            <filter>not outputType == "raw"</filter>\n-        </data>\n-        <data format="bed" name="output_bed" label="${tool.name} markers on ${on_string} (bed)">\n-            <filter>outputType == "MergedCarto"</filter>\n-        </data>\n-    </outputs>\n-    <tests>\n-        <test>\n-            <param name="inputTabular" value="VCFCarto_input.tab"/>\n-            <param name="parentA" value="REF1"/>\n-            <param name="parentH" value="REF2"/>\n-            <param name="outputType" value="raw"/>\n-            <output name="outputVCFCarto" file="VCFCarto_output.tab" ftype="tabular"/>\n-        </test>\n-        <test>\n-            <param name="inputTabular" value="VCFCarto_input.tab"/>\n-            <param name="parentA" value="REF1"/>\n-            <param name="parentH" value="REF2"/>\n-            <param name="outputType" value="MergedCarto"/>\n-            <output name="outputVCFCarto" file="VCFCarto_output_merged.tab" ftype="tabular"/>\n-            <output name="output_bed" file="VCFCarto_output_merged.bed" ftype="bed"/>\n-        </test>\n-    </tests>\n-    <help><![CDATA[\n-      \n-**VCFcarto converts a tabulated marker file into a file with only the markers from 2 parents**\n-\n-.. class:: infomark\n-\n-expected input format is the output from VCFStorage.\n-\n------\n-\n-**what it does :**\n-\n-VCFcarto converts a tabulated marker file into a file with only the markers from 2 parents, refA and refH. \n-\n-2 formats are possible, either the input format is conserved, or the format is changed into a 3 letter format\n-\n------\n-\n-**input format :**\n-\n-.. class:: infomark\n-\n-expected input format is the output from VCFStorage.\n-\n-the expected format is a tab delimited format file where all genomic positions are in rows, and all strains are in columns\n-\n-For each position and each genome, a code is attributed : \n-\n-- for the reference : ::\n-\n-    A,T,G,C for the corresponding nucleotidic acid \n-\n-- for the genomes : ::\n-\n-    U if the position was not refered in the VCF file \n-    R if the base is similar to the reference \n-    F if the base has been filtered out\n-    A,T,G,C if the genome has a validated SNP at the position\n-\n------\n-\n-**output format :**\n-\n-for the main output, 2 formats are possible : \n-\n-- The first format is similar to the input format (same columns and code) '..b'U\tR\tR\tR\tU\tR\tR\tR\tR\tR\tR\tR\n-\tChr3\t3\tT\tF\tR\tR\tR\tR\tR\tU\tR\tR\tR\tR\tR\tR\tR\n-\tChr3\t4\tT\tR\tR\tF\tR\tR\tR\tU\tR\tR\tR\tR\tR\tR\tR\n-\n-\n-output :\n-\n-- without A - H code : ::\n-\n-\tCHROM\tPOS\treference\tREF1\tG01\tREF2\tG02\tG03\tG04\tG05\tG06\tG07\tG08\tG09\tG10\tG11\tG12\n-\tChr1\t7\tA\tG\tC\tC\tC\tF\tC\tC\tC\tC\tC\tG\tC\tG\tG\n-\tChr1\t9\tC\tR\tT\tT\tR\tT\tT\tT\tU\tR\tT\tR\tT\tT\tT\n-\tChr1\t13\tA\tR\tR\tG\tG\tR\tF\tR\tF\tG\tR\tG\tR\tR\tF\n-\tChr1\t16\tG\tA\tR\tR\tA\tR\tR\tU\tF\tR\tR\tA\tA\tR\tA\n-\tChr1\t17\tA\tR\tG\tG\tR\tU\tR\tR\tG\tG\tR\tG\tU\tR\tG\n-\tChr1\t19\tG\tC\tU\tR\tC\tR\tC\tU\tR\tR\tC\tC\tC\tR\tC\n-\tChr1\t20\tG\tA\tU\tR\tA\tR\tA\tU\tR\tR\tA\tA\tA\tR\tA\n-\tChr1\t21\tG\tT\tU\tR\tT\tR\tT\tU\tR\tR\tT\tT\tT\tR\tT\n-\tChr1\t22\tA\tT\tU\tR\tT\tR\tT\tU\tR\tR\tT\tT\tT\tR\tT\n-\tChr1\t23\tC\tT\tT\tR\tT\tR\tR\tR\tT\tR\tU\tT\tR\tT\tT\n-\tChr1\t26\tT\tR\tR\tC\tC\tC\tC\tC\tR\tR\tC\tR\tC\tR\tU\n-\tChr1\t27\tC\tR\tR\tG\tG\tG\tG\tR\tG\tR\tG\tR\tG\tR\tR\n-\tChr1\t28\tC\tG\tT\tT\tT\tG\tG\tT\tT\tF\tT\tG\tT\tT\tG\n-\tChr1\t29\tG\tT\tR\tR\tR\tR\tT\tR\tT\tR\tT\tT\tR\tT\tR\n-\tChr1\t32\tA\tG\tG\tR\tG\tG\tG\tR\tR\tG\tG\tG\tG\tG\tR\n-\tChr2\t2\tA\tC\tR\tR\tC\tC\tU\tR\tR\tR\tR\tC\tC\tC\tU\n-\tChr2\t7\tA\tT\tF\tR\tU\tR\tT\tT\tT\tR\tT\tT\tF\tT\tT\n-\tChr2\t10\tG\tR\tT\tT\tT\tT\tR\tT\tR\tR\tR\tR\tR\tU\tR\n-\tChr2\t11\tC\tR\tA\tA\tA\tA\tR\tA\tR\tR\tR\tR\tR\tU\tR\n-\tChr2\t12\tA\tR\tT\tT\tT\tT\tR\tT\tR\tR\tR\tR\tR\tU\tR\n-\tChr2\t13\tT\tR\tC\tC\tC\tC\tR\tC\tR\tR\tR\tR\tR\tU\tR\n-\tChr2\t14\tC\tT\tA\tA\tT\tA\tT\tA\tT\tA\tT\tT\tA\tA\tA\n-\tChr2\t21\tG\tA\tR\tR\tA\tA\tA\tR\tR\tR\tA\tA\tR\tR\tR\n-\tChr2\t23\tA\tR\tR\tT\tT\tR\tR\tT\tT\tT\tT\tT\tR\tR\tR\n-\tChr2\t25\tT\tR\tA\tA\tR\tR\tA\tR\tA\tR\tR\tA\tR\tR\tA\n-\tChr2\t30\tT\tA\tA\tG\tA\tG\tG\tA\tA\tG\tF\tG\tG\tG\tU\n-\n-- with A - H code but no markers : ::\n-\n-\tCHROM\tPOS\treference\tREF1\tG01\tREF2\tG02\tG03\tG04\tG05\tG06\tG07\tG08\tG09\tG10\tG11\tG12\n-\tChr1\t7\t-\tA\tH\tH\tH\t-\tH\tH\tH\tH\tH\tA\tH\tA\tA\n-\tChr1\t9\t-\tA\tH\tH\tA\tH\tH\tH\t-\tA\tH\tA\tH\tH\tH\n-\tChr1\t13\t-\tA\tA\tH\tH\tA\t-\tA\t-\tH\tA\tH\tA\tA\t-\n-\tChr1\t16\t-\tA\tH\tH\tA\tH\tH\t-\t-\tH\tH\tA\tA\tH\tA\n-\tChr1\t17\t-\tA\tH\tH\tA\t-\tA\tA\tH\tH\tA\tH\t-\tA\tH\n-\tChr1\t19\t-\tA\t-\tH\tA\tH\tA\t-\tH\tH\tA\tA\tA\tH\tA\n-\tChr1\t20\t-\tA\t-\tH\tA\tH\tA\t-\tH\tH\tA\tA\tA\tH\tA\n-\tChr1\t21\t-\tA\t-\tH\tA\tH\tA\t-\tH\tH\tA\tA\tA\tH\tA\n-\tChr1\t22\t-\tA\t-\tH\tA\tH\tA\t-\tH\tH\tA\tA\tA\tH\tA\n-\tChr1\t23\t-\tA\tA\tH\tA\tH\tH\tH\tA\tH\t-\tA\tH\tA\tA\n-\tChr1\t26\t-\tA\tA\tH\tH\tH\tH\tH\tA\tA\tH\tA\tH\tA\t-\n-\tChr1\t27\t-\tA\tA\tH\tH\tH\tH\tA\tH\tA\tH\tA\tH\tA\tA\n-\tChr1\t28\t-\tA\tH\tH\tH\tA\tA\tH\tH\t-\tH\tA\tH\tH\tA\n-\tChr1\t29\t-\tA\tH\tH\tH\tH\tA\tH\tA\tH\tA\tA\tH\tA\tH\n-\tChr1\t32\t-\tA\tA\tH\tA\tA\tA\tH\tH\tA\tA\tA\tA\tA\tH\n-\tChr2\t2\t-\tA\tH\tH\tA\tA\t-\tH\tH\tH\tH\tA\tA\tA\t-\n-\tChr2\t7\t-\tA\t-\tH\t-\tH\tA\tA\tA\tH\tA\tA\t-\tA\tA\n-\tChr2\t10\t-\tA\tH\tH\tH\tH\tA\tH\tA\tA\tA\tA\tA\t-\tA\n-\tChr2\t11\t-\tA\tH\tH\tH\tH\tA\tH\tA\tA\tA\tA\tA\t-\tA\n-\tChr2\t12\t-\tA\tH\tH\tH\tH\tA\tH\tA\tA\tA\tA\tA\t-\tA\n-\tChr2\t13\t-\tA\tH\tH\tH\tH\tA\tH\tA\tA\tA\tA\tA\t-\tA\n-\tChr2\t14\t-\tA\tH\tH\tA\tH\tA\tH\tA\tH\tA\tA\tH\tH\tH\n-\tChr2\t21\t-\tA\tH\tH\tA\tA\tA\tH\tH\tH\tA\tA\tH\tH\tH\n-\tChr2\t23\t-\tA\tA\tH\tH\tA\tA\tH\tH\tH\tH\tH\tA\tA\tA\n-\tChr2\t25\t-\tA\tH\tH\tA\tA\tH\tA\tH\tA\tA\tH\tA\tA\tH\n-\tChr2\t30\t-\tA\tA\tH\tA\tH\tH\tA\tA\tH\t-\tH\tH\tH\t-\n-\n-- with A - H code and merge  : \n- \n- - tab file : ::\n- \n-\tCHROM\tPOS\treference\tREF1\tG01\tREF2\tG02\tG03\tG04\tG05\tG06\tG07\tG08\tG09\tG10\tG11\tG12\n-\tChr1\t*M_00001\t-\tA\tH\tH\tH\t-\tH\tH\tH\tH\tH\tA\tH\tA\tA\n-\tChr1\t*M_00002\t-\tA\tH\tH\tA\tH\tH\tH\t-\tA\tH\tA\tH\tH\tH\n-\tChr1\t*M_00003\t-\tA\tA\tH\tH\tA\t-\tA\t-\tH\tA\tH\tA\tA\t-\n-\tChr1\t*M_00004\t-\tA\tH\tH\tA\tH\tH\t-\t-\tH\tH\tA\tA\tH\tA\n-\tChr1\t*M_00005\t-\tA\tH\tH\tA\t-\tA\tA\tH\tH\tA\tH\t-\tA\tH\n-\tChr1\t*M_00006\t-\tA\t-\tH\tA\tH\tA\t-\tH\tH\tA\tA\tA\tH\tA\n-\tChr1\t*M_00007\t-\tA\tA\tH\tA\tH\tH\tH\tA\tH\t-\tA\tH\tA\tA\n-\tChr1\t*M_00008\t-\tA\tA\tH\tH\tH\tH\tH\tA\tA\tH\tA\tH\tA\t-\n-\tChr1\t*M_00009\t-\tA\tA\tH\tH\tH\tH\tA\tH\tA\tH\tA\tH\tA\tA\n-\tChr1\t*M_00010\t-\tA\tH\tH\tH\tA\tA\tH\tH\t-\tH\tA\tH\tH\tA\n-\tChr1\t*M_00011\t-\tA\tH\tH\tH\tH\tA\tH\tA\tH\tA\tA\tH\tA\tH\n-\tChr1\t*M_00012\t-\tA\tA\tH\tA\tA\tA\tH\tH\tA\tA\tA\tA\tA\tH\n-\tChr2\t*M_00013\t-\tA\tH\tH\tA\tA\t-\tH\tH\tH\tH\tA\tA\tA\t-\n-\tChr2\t*M_00014\t-\tA\t-\tH\t-\tH\tA\tA\tA\tH\tA\tA\t-\tA\tA\n-\tChr2\t*M_00015\t-\tA\tH\tH\tH\tH\tA\tH\tA\tA\tA\tA\tA\t-\tA\n-\tChr2\t*M_00016\t-\tA\tH\tH\tA\tH\tA\tH\tA\tH\tA\tA\tH\tH\tH\n-\tChr2\t*M_00017\t-\tA\tH\tH\tA\tA\tA\tH\tH\tH\tA\tA\tH\tH\tH\n-\tChr2\t*M_00018\t-\tA\tA\tH\tH\tA\tA\tH\tH\tH\tH\tH\tA\tA\tA\n-\tChr2\t*M_00019\t-\tA\tH\tH\tA\tA\tH\tA\tH\tA\tA\tH\tA\tA\tH\n-\tChr2\t*M_00020\t-\tA\tA\tH\tA\tH\tH\tA\tA\tH\t-\tH\tH\tH\t-\n-\n- - bed file : :: \n-    \n-\tChr1\t7\t7\t*M_00001\n-\tChr1\t9\t9\t*M_00002\n-\tChr1\t13\t13\t*M_00003\n-\tChr1\t16\t16\t*M_00004\n-\tChr1\t17\t17\t*M_00005\n-\tChr1\t19\t22\t*M_00006\n-\tChr1\t23\t23\t*M_00007\n-\tChr1\t26\t26\t*M_00008\n-\tChr1\t27\t27\t*M_00009\n-\tChr1\t28\t28\t*M_00010\n-\tChr1\t29\t29\t*M_00011\n-\tChr1\t32\t32\t*M_00012\n-\tChr2\t2\t2\t*M_00013\n-\tChr2\t7\t7\t*M_00014\n-\tChr2\t10\t13\t*M_00015\n-\tChr2\t14\t14\t*M_00016\n-\tChr2\t21\t21\t*M_00017\n-\tChr2\t23\t23\t*M_00018\n-\tChr2\t25\t25\t*M_00019\n-\tChr2\t30\t30\t*M_00020\n-\n-\n------\n-\n-**reference :**\n-\n-]]>\n-    </help>\n-</tool>\n'
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFFiltering_wrapper.py
--- a/VCFFiltering_wrapper.py Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,110 +0,0 @@
-#!/usr/bin/env python
-
-
-import subprocess, tempfile, sys, os, glob, shutil, time
-from optparse import OptionParser
-from optparse import Option, OptionValueError
-
-class VCFFilteringWrapper(object):
-
- def __init__(self):
- self._options = None
-
-
- def stop_err(self, msg ):
- sys.stderr.write( "%s\n" % msg )
- sys.exit()
-
-
- def setAttributesFromCmdLine(self):
- description = "VCFFiltering_wrapper"
- description += "\nWrapper for VCFFiltering ;\n VCFFiltering filters SNP on a VCF depending on depth (DP) allele number (AN), allele frequency (AF) and SNP quality.\n"
- description += "example 1 : VCFFiltering.py -f myVCF.vcf -o FilteredVCF.vcf\n"
- description += "example 2 : VCFFiltering.py -f myVCF.vcf -N 2 -F 0.87 -b bed1.bed bed2.bed -o FilteredVCF.vcf\n"
- parser = OptionParser(description = description, version = "0.1")
- parser.add_option("-f", "--vcf",    dest = "VCFFile",   action = "store", type = "string", help = "Input VCF File name [compulsory] [format: VCF]",                                           default = "")
- parser.add_option("-o", "--output", dest = "outFile",   action = "store", type = "string", help = "output VCF File name [compulsory] [format: VCF]",                                             default = "")
- parser.add_option("-m", "--minDP",  dest = "minDP",  action = "store", type = "int", help = "minimum of depth ; if both minDP and maxDP are set, optimal DP will not be calculated ",     default = 0)
- parser.add_option("-M", "--maxDP",  dest = "maxDP",  action = "store", type = "int", help = "maximum of depth ; if both minDP and maxDP are set, optimal DP will not be calculated ",     default = 0)
- parser.add_option("-N", "--AN", dest = "AN", action = "store", type = "int", help = "maximum number of allele for a SNP; default = 2",                                                default = 2)
- parser.add_option("-F", "--AF", dest = "AF", action = "store", type = "float",  help = "minimum frequency for the alternative allele of a SNP; default = 0.9",                        default = 0.9)
- parser.add_option("-b", "--bed",    dest = "bedFiles",  action = "append", type = "string", help = "bed files: list of coordinates to filter, multiple arguments allowed '-b file1 file2' ",  default = [])
- parser.add_option("-G", "--graphHTML", dest = "graphHTML", action = "store", type = "string", help = "name of the HTML linking to graphs ",                                                      default = "")
- parser.add_option("-d", "--dirGraphs", dest = "dirGraphs", action = "store", type = "string", help = "name of the folder containing graphs ",                                                    default = "")
- options = parser.parse_args()[0]
- self._setAttributesFromOptions(options)
-
-
- def _setAttributesFromOptions(self, options):
- self._options = options
-
- def run(self):
- if self._options.minDP and self._options.maxDP :
- if self._options.minDP > self._options.maxDP :
- self.stop_err( 'error in options : minDP > max DP (%s > %s)' %(self._options.minDP,self._options.maxDP)) 
-
- prg = "VCFFiltering.py -g -G 'png' "
- args = ""
- args += "-f %s" % self._options.VCFFile
- args += " "
- args += "-o %s" % self._options.outFile
- if self._options.AF :
- args += " "
- args += "-F %s" % self._options.AF
- if self._options.AN :
- args += " "
- args += "-N %s" % self._options.AN
- if self._options.minDP :
- args += " "
- args += "-m %s" % self._options.minDP
- if self._options.maxDP :
- args += " "
- args += "-M %s" % self._options.maxDP
- if self._options.bedFiles :
- if self._options.bedFiles == "":
- pass
- else :
- self._lBedFiles = self._options.bedFiles
- cmd = "%s %s" %(prg, args)
-
- print cmd
-
- try:
- tmp_err = tempfile.NamedTemporaryFile().name
- tmp_stderr = open( tmp_err, 'wb' )
- proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stderr=tmp_stderr )
- returncode = proc.wait()
- tmp_stderr.close()
- # get stderr, allowing for case where it's very large
- tmp_stderr = open( tmp_err, 'rb' )
- stderr = ''
- buffsize = 1048576
- try:
- while True:
- stderr += tmp_stderr.read( buffsize )
- if not stderr or len( stderr ) % buffsize != 0:
- break
- except OverflowError:
- pass
- tmp_stderr.close()
- if stderr:
- raise Exception, stderr
- except Exception, e:
- self.stop_err( 'Error in VCFFiltering:\n' + str( e ) ) 
-
- if True :
- html = open(self._options.graphHTML, "w")
-
- os.mkdir(self._options.dirGraphs)
- lGraphsFiles = glob.glob("VCFFiltering_graphs/*")
- for file in lGraphsFiles :
- baseName = os.path.basename(file)
- shutil.move( file ,"%s/%s" %(self._options.dirGraphs, baseName))
- line = "<img src=\"%s\" > \n" %(baseName)
- html.write(line)
-
-
-if __name__ == "__main__":
- iWrapper = VCFFilteringWrapper()
- iWrapper.setAttributesFromCmdLine()
- iWrapper.run()
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFFiltering_wrapper.xml
--- a/VCFFiltering_wrapper.xml Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,157 +0,0 @@
-<tool id="VCFFiltering" name="VCFFiltering" version="0.01">
-    <description>Filters SNP on a VCF depending on depth, allele number and allele frequency</description>
-    <requirements>
-        <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>
-    </requirements>
-    <version_command>
-  VCFFiltering.py --version
-    </version_command>
-    <command interpreter="python">
- #if $DP_auto.is_DP_auto
-    VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN
-    #else
-    VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN -m $DP_auto.DPmin -M $DP_auto.DPmax
- #end if
- ## Bed files
- #if $BedFile_List.is_BedFile
- #for $bed in $BedFile_List.BedFiles
-  -b $bed.inputBed
- #end for
- #end if
- --graphHTML $output_html --dirGraphs "$output_html.files_path"
-
-    </command>
-    <inputs>
-        <param name="inputVCF" type="data" format="vcf" label="Input VCF File name (from FreeBayes)"/>
-        <conditional name="DP_auto">
-            <param name="is_DP_auto" type="boolean" label="Calculate optimal depth range automatically" truevalue="yes" falsevalue="no" checked="on" />
-            <when value="yes"/>
-            <when value="no">
-                <param name="DPmin" type="integer" label="minumum Depth" value="1" help="default = 1">
-                    <validator type="in_range" min="0" message="DP can't be negative" />
-                </param>
-                <param name="DPmax" type="integer" label="maximum Depth" value="200" help="default = 200">
-                    <validator type="in_range" min="0" message="DP can't be negative"  />
-                </param>
-            </when>
-        </conditional>
-        <param name="AF" type="float" value="0.9" label="minimum allele frequency" help="default = 0.9">
-            <validator type="in_range" min="0.0" max="1.0"/>
-        </param>
-        <param name="AN" type="integer" value="2" label="maximum allele number" help="default = 2">
-            <validator type="in_range" min="1" message="Allele number can't be negative" />
-        </param>
-        <conditional name="BedFile_List">
-            <param name="is_BedFile" type="boolean" label="bed files : list of coordinates to filter, multiple beds allowed" truevalue="yes" falsevalue="no" checked="off" />
-            <when value="no"/>
-            <when value="yes">
-                <repeat name="BedFiles" title="bed files : list of coordinates to filter, multiple beds allowed" min="1">
-                    <param name="inputBed" type="data" format="bed" label="Select Bed file "/>
-                </repeat>
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data format="vcf" name="outputVCF" label="${tool.name} on ${on_string} (vcf)"/>
-        <data format="html" name="output_html" label="${tool.name} graphs on ${on_string} (html)">
-        </data>
-    </outputs>
-    <tests>
-        <test>
-            <param name="is_DP_auto" value="yes" />
-            <param name="AF" value="0.9"/>
-            <param name="AN" value="2"/>
-            <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" />
-            <output name="outputVCF" file="VCFFiltering_DPauto_output.vcf"/>
-        </test>
-        <test>
-            <param name="is_DP_auto" value="no" />
-            <param name="DPmin" value="4"/>
-            <param name="DPmax" value="200"/>
-            <param name="AF" value="0.9"/>
-            <param name="AN" value="2"/>
-            <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" />
-            <output name="outputVCF" file="VCFFiltering_DP_4_200_output.vcf"/>
-        </test>
-    </tests>
-    <help><![CDATA[
-**Filters SNP on a VCF depending on depth, allele number and allele frequency**
-
------
-
-**what it does :**
-
-VCFFiltering is a python script that allows to filter SNP results from freebayes on multiple criterias as once. The filters are : 
-
- - Allele number : number of possible allele at the genomic position
- - Allele frequency : frenquency of the most represented allele ; note that if the most represented allele is the reference (a "." in the 4th column of the VCF, the allele frequency will still work but allele frequency should be under 1-x)
- - Depth : Higher and lower bound of the depth ; the depth is the number of reads mapped on the genomic positions.
-
-Depth can be automatically detected. If you do so, The 90 % of the positions with a depth closest to the most frequent depth will pass the filter. 
-
-This script has been developped to be used with freebayes output, on haploïd data. 
-
-
-.. class:: infomark
-
-the VCF source is detected from the header. Please keep the header of your VCF file if you want to use this tool
-
------
-
-**input and output formats :**
-
-input format is a VCF file obtaines with freebayes ; headers are necessary
-you can also add some bed files to filter some specific regions.
-
-output format is a filtered VCF file.
-
------
-
-**example :**
-
-
-VCF input file: ::
-
-    ##fileformat=VCFv4.1
-    ##fileDate=20150126
-    ##source=freeBayes v0.9.13-2-ga830efd
-    ##reference=ref.fsa
-    ##phasing=none
-    ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam"
-    #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT strain_1
-    chrom1 1 . T . . . DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:4:4:38:.:.:0
-    chrom1 2 . A . . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:12:11:38:.:.:0
-    chrom1 3 . T A . . DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:5:0:38:5:.:0
-    chrom1 4 . G T . . DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:6:1:38:5:.:0
-    chrom1 5 . C C . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:12:1:38:11:.:0
-
-purposed options: ::
-
- - Calculate optimal depth range automatically = no
- - minumum Depth = 5
- - maximum Depth = 14
- - minimum allele frequency = 0.9
- - maximum allele number = 2
-
-exemple result : ::
-
-    ##fileformat=VCFv4.1
-    ##fileDate=20150126
-    ##source=freeBayes v0.9.13-2-ga830efd
-    ##reference=ref.fsa
-    ##phasing=none
-    ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam"
-    #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT strain_1
-    chrom1 1 . T . . G_DP DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177;G_AN=0;G_AF=0.00;G_DP=4;G_Base=T GT:DP:RO:QR:AO:QA:GL 0/0:4:4:38:.:.:0
-    chrom1 2 . A . . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177;G_AN=0;G_AF=0.08;G_DP=12;G_Base=A GT:DP:RO:QR:AO:QA:GL 0/0:12:11:38:.:.:0
-    chrom1 3 . T A . . DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=1.00;G_DP=5;G_Base=A GT:DP:RO:QR:AO:QA:GL 0/0:5:0:38:5:.:0
-    chrom1 4 . G T . G_AF DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=0.83;G_DP=6;G_Base=T GT:DP:RO:QR:AO:QA:GL 0/0:6:1:38:5:.:0
-    chrom1 5 . C C . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177;G_AN=0;G_AF=0.92;G_DP=12;G_Base=C GT:DP:RO:QR:AO:QA:GL 0/0:12:1:38:11:.:0
-
------
-
-**reference :**
-
-]]>
-    </help>
-</tool>
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/VCFCarto_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/VCFCarto_wrapper.py Tue Apr 05 08:33:41 2016 -0400
[
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+
+import subprocess, tempfile, sys, os, glob, shutil, time
+from optparse import OptionParser
+
+
+class VCFCartoWrapper(object):
+
+    def __init__(self):
+        self._options = None
+        
+        
+    def stop_err(self, msg ):
+        sys.stderr.write( "%s\n" % msg )
+        sys.exit()
+        
+        
+    def setAttributesFromCmdLine(self):
+        description = "VCFcarto can convert your tabulated file into a file with only the SNP from refA and refH.\n"
+        description += "2 formats are possible, either the input format is conserved, or the format is changed into a 3 letter format\n"
+        description += "(\"A\" for refA, \"H\" for refH and \"-\" when the base do not correspond to any parent)\n\n"
+        description += "example 1 : VCFcarto.py -f Storage.out -A G15 -H G23 -o FilteredStorage.out\n"
+        description += "example 2 : VCFcarto.py -f Storage.out -A ref1 -H ref2 -p -s -g -m -o cartoTable.out\n"
+        parser = OptionParser(description = description, version = "0.2") 
+        parser.add_option("-f", "--file",      dest = "tableName", action = "store", type = "string", help = "Input TSV File name [compulsory] [format: TSV]",                                                                                                    default = "")
+        parser.add_option("-o", "--output",    dest = "outFile",   action = "store", type = "string", help = "output TSV File name [compulsory] [format: TSV]",                                                                                                   default = "")
+        parser.add_option("-A", "--refA",      dest = "refA",      action = "store", type = "string", help = "name of the reference genome A [compulsory] ",                                                                                                      default = "")
+        parser.add_option("-H", "--refH",      dest = "refH",      action = "store", type = "string", help = "name of the reference genome H [compulsory] ",                                                                                                      default = "")
+        parser.add_option("-p", "--onlyParents",   dest = "onlyPar",   action = "store_true",         help = "Will change every letters by either A or H depending on which parents the strain correspond to for that base[optional] [default: False]",           default = False)
+        parser.add_option("-m", "--mergeMarkers",   dest = "mergeMarkers",   action = "store_true",   help = "Will merge sequential markers with the same information ; option -p is needed [optional] [default: False]",                                         default = False)
+        parser.add_option("-M", "--mergeFile", dest = "mergeFile", action = "store", type = "string", help = "name for the mergeFile ",                                                                                                                           default = "")
+        parser.add_option("-g", "--graphics",  dest = "graphs",    action = "store_true",             help = "create graphs. Only works with -p[optional] [default: False]",                                                                                      default = False)
+        parser.add_option("-G", "--graphHTML", dest = "graphHTML", action = "store", type = "string", help = "name of the HTML linking to graphs ",                                                                                                              default = "")
+        parser.add_option("-d", "--dirGraphs", dest = "dirGraphs", action = "store", type = "string", help = "name of the folder containing graphs ",                                                                                                              default = "")
+        options = parser.parse_args()[0]
+        self._setAttributesFromOptions(options)
+
+
+    def _setAttributesFromOptions(self, options):
+        self._options = options
+
+    def run(self):
+        prg = "VCFCarto.py"
+        args = ""
+        args += "-f %s" % self._options.tableName
+        args += " "
+        args += "-o %s" % self._options.outFile
+        args += " "
+        args += "-A %s" % self._options.refA
+        args += " "
+        args += "-H %s" % self._options.refH
+        args += " "
+        args += "-v 2 "
+        if self._options.onlyPar :
+            args += " "
+            args += "-p"
+            if self._options.mergeMarkers :
+                args += " "
+                args += "-m"
+            if self._options.graphs :
+                args += " "
+                args += "-g"
+        cmd = "%s %s" %(prg, args)
+        
+        print cmd
+        
+        try:
+            tmp_err = tempfile.NamedTemporaryFile().name
+            tmp_stderr = open( tmp_err, 'wb' )
+            proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stderr=tmp_stderr )
+            returncode = proc.wait()
+            tmp_stderr.close()
+            # get stderr, allowing for case where it's very large
+            tmp_stderr = open( tmp_err, 'rb' )
+            stderr = ''
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read( buffsize )
+                    if not stderr or len( stderr ) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+            tmp_stderr.close()
+            if stderr:
+                raise Exception, stderr
+        except Exception, e:
+            self.stop_err( 'Error in VCFCarto:\n' + str( e ) ) 
+        
+        if self._options.mergeMarkers :
+            shutil.move("markerList.bed" ,self._options.mergeFile)
+        if self._options.graphs :
+            html = open(self._options.graphHTML, "w")
+            
+            os.mkdir(self._options.dirGraphs)
+            lGraphsFiles = glob.glob("VCFCarto_graphs/*")
+            for file in lGraphsFiles :
+                baseName = os.path.basename(file)
+                shutil.move( file ,"%s/%s" %(self._options.dirGraphs, baseName))
+                line = "<img src=\"%s\" > \n" %(baseName)
+                html.write(line)
+                
+if __name__ == "__main__":
+    iWrapper = VCFCartoWrapper()
+    iWrapper.setAttributesFromCmdLine()
+    iWrapper.run()    
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/VCFCarto_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/VCFCarto_wrapper.xml Tue Apr 05 08:33:41 2016 -0400
[
b'@@ -0,0 +1,315 @@\n+<tool id="VCFCarto" name="VCFCarto" version="0.01">\n+    <description>VCFcarto can convert a tabulated marker file into a file with only the markers from 2 parents </description>\n+    <requirements>\n+        <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>\n+    </requirements>\n+    <version_command>\n+  VCFCarto.py --version\n+    </version_command>\n+    <command interpreter="python">\n+    VCFCarto_wrapper.py -f $inputTabular -o $outputVCFCarto -A $parentA -H $parentH\n+    #if str($outputType) == "carto"\n+     -p -g --graphHTML $output_html --dirGraphs "$output_html.files_path"\n+    #end if\n+    #if str($outputType) == "MergedCarto"\n+     -p -g --graphHTML $output_html --dirGraphs "$output_html.files_path" -m --mergeFile $output_bed\n+    #end if\n+    </command>\n+    <inputs>\n+        <param name="inputTabular" type="data" format="tabular" label="indicate your tabulated marker file"/>\n+        <param name="parentA" size="20" type="text" value="V1" label="indicate parent 1 name (A)"/>\n+        <param name="parentH" size="20" type="text" value="V2" label="indicate parent 2 name (H)"/>\n+        <param name="outputType" type="select" display="radio" label="select type of output" multiple="False">\n+            <option value="raw" >7 caracter code</option>\n+            <option value="carto" >A - H code</option>\n+            <option value="MergedCarto" >A - H code and merge</option>\n+        </param>\n+    </inputs>\n+    <outputs>\n+        <data format="tabular" name="outputVCFCarto" label="${tool.name} on ${on_string} (tabular)"/>\n+        <data format="html" name="output_html" label="${tool.name} graphs on ${on_string} (html)">\n+            <filter>not outputType == "raw"</filter>\n+        </data>\n+        <data format="bed" name="output_bed" label="${tool.name} markers on ${on_string} (bed)">\n+            <filter>outputType == "MergedCarto"</filter>\n+        </data>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="inputTabular" value="VCFCarto_input.tab"/>\n+            <param name="parentA" value="REF1"/>\n+            <param name="parentH" value="REF2"/>\n+            <param name="outputType" value="raw"/>\n+            <output name="outputVCFCarto" file="VCFCarto_output.tab" ftype="tabular"/>\n+        </test>\n+        <test>\n+            <param name="inputTabular" value="VCFCarto_input.tab"/>\n+            <param name="parentA" value="REF1"/>\n+            <param name="parentH" value="REF2"/>\n+            <param name="outputType" value="MergedCarto"/>\n+            <output name="outputVCFCarto" file="VCFCarto_output_merged.tab" ftype="tabular"/>\n+            <output name="output_bed" file="VCFCarto_output_merged.bed" ftype="bed"/>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+      \n+**VCFcarto converts a tabulated marker file into a file with only the markers from 2 parents**\n+\n+.. class:: infomark\n+\n+expected input format is the output from VCFStorage.\n+\n+-----\n+\n+**what it does :**\n+\n+VCFcarto converts a tabulated marker file into a file with only the markers from 2 parents, refA and refH. \n+\n+2 formats are possible, either the input format is conserved, or the format is changed into a 3 letter format\n+\n+-----\n+\n+**input format :**\n+\n+.. class:: infomark\n+\n+expected input format is the output from VCFStorage.\n+\n+the expected format is a tab delimited format file where all genomic positions are in rows, and all strains are in columns\n+\n+For each position and each genome, a code is attributed : \n+\n+- for the reference : ::\n+\n+    A,T,G,C for the corresponding nucleotidic acid \n+\n+- for the genomes : ::\n+\n+    U if the position was not refered in the VCF file \n+    R if the base is similar to the reference \n+    F if the base has been filtered out\n+    A,T,G,C if the genome has a validated SNP at the position\n+\n+-----\n+\n+**output format :**\n+\n+for the main output, 2 formats are possible : \n+\n+- The first format is similar to the input format (same columns and code) '..b'U\tR\tR\tR\tU\tR\tR\tR\tR\tR\tR\tR\n+\tChr3\t3\tT\tF\tR\tR\tR\tR\tR\tU\tR\tR\tR\tR\tR\tR\tR\n+\tChr3\t4\tT\tR\tR\tF\tR\tR\tR\tU\tR\tR\tR\tR\tR\tR\tR\n+\n+\n+output :\n+\n+- without A - H code : ::\n+\n+\tCHROM\tPOS\treference\tREF1\tG01\tREF2\tG02\tG03\tG04\tG05\tG06\tG07\tG08\tG09\tG10\tG11\tG12\n+\tChr1\t7\tA\tG\tC\tC\tC\tF\tC\tC\tC\tC\tC\tG\tC\tG\tG\n+\tChr1\t9\tC\tR\tT\tT\tR\tT\tT\tT\tU\tR\tT\tR\tT\tT\tT\n+\tChr1\t13\tA\tR\tR\tG\tG\tR\tF\tR\tF\tG\tR\tG\tR\tR\tF\n+\tChr1\t16\tG\tA\tR\tR\tA\tR\tR\tU\tF\tR\tR\tA\tA\tR\tA\n+\tChr1\t17\tA\tR\tG\tG\tR\tU\tR\tR\tG\tG\tR\tG\tU\tR\tG\n+\tChr1\t19\tG\tC\tU\tR\tC\tR\tC\tU\tR\tR\tC\tC\tC\tR\tC\n+\tChr1\t20\tG\tA\tU\tR\tA\tR\tA\tU\tR\tR\tA\tA\tA\tR\tA\n+\tChr1\t21\tG\tT\tU\tR\tT\tR\tT\tU\tR\tR\tT\tT\tT\tR\tT\n+\tChr1\t22\tA\tT\tU\tR\tT\tR\tT\tU\tR\tR\tT\tT\tT\tR\tT\n+\tChr1\t23\tC\tT\tT\tR\tT\tR\tR\tR\tT\tR\tU\tT\tR\tT\tT\n+\tChr1\t26\tT\tR\tR\tC\tC\tC\tC\tC\tR\tR\tC\tR\tC\tR\tU\n+\tChr1\t27\tC\tR\tR\tG\tG\tG\tG\tR\tG\tR\tG\tR\tG\tR\tR\n+\tChr1\t28\tC\tG\tT\tT\tT\tG\tG\tT\tT\tF\tT\tG\tT\tT\tG\n+\tChr1\t29\tG\tT\tR\tR\tR\tR\tT\tR\tT\tR\tT\tT\tR\tT\tR\n+\tChr1\t32\tA\tG\tG\tR\tG\tG\tG\tR\tR\tG\tG\tG\tG\tG\tR\n+\tChr2\t2\tA\tC\tR\tR\tC\tC\tU\tR\tR\tR\tR\tC\tC\tC\tU\n+\tChr2\t7\tA\tT\tF\tR\tU\tR\tT\tT\tT\tR\tT\tT\tF\tT\tT\n+\tChr2\t10\tG\tR\tT\tT\tT\tT\tR\tT\tR\tR\tR\tR\tR\tU\tR\n+\tChr2\t11\tC\tR\tA\tA\tA\tA\tR\tA\tR\tR\tR\tR\tR\tU\tR\n+\tChr2\t12\tA\tR\tT\tT\tT\tT\tR\tT\tR\tR\tR\tR\tR\tU\tR\n+\tChr2\t13\tT\tR\tC\tC\tC\tC\tR\tC\tR\tR\tR\tR\tR\tU\tR\n+\tChr2\t14\tC\tT\tA\tA\tT\tA\tT\tA\tT\tA\tT\tT\tA\tA\tA\n+\tChr2\t21\tG\tA\tR\tR\tA\tA\tA\tR\tR\tR\tA\tA\tR\tR\tR\n+\tChr2\t23\tA\tR\tR\tT\tT\tR\tR\tT\tT\tT\tT\tT\tR\tR\tR\n+\tChr2\t25\tT\tR\tA\tA\tR\tR\tA\tR\tA\tR\tR\tA\tR\tR\tA\n+\tChr2\t30\tT\tA\tA\tG\tA\tG\tG\tA\tA\tG\tF\tG\tG\tG\tU\n+\n+- with A - H code but no markers : ::\n+\n+\tCHROM\tPOS\treference\tREF1\tG01\tREF2\tG02\tG03\tG04\tG05\tG06\tG07\tG08\tG09\tG10\tG11\tG12\n+\tChr1\t7\t-\tA\tH\tH\tH\t-\tH\tH\tH\tH\tH\tA\tH\tA\tA\n+\tChr1\t9\t-\tA\tH\tH\tA\tH\tH\tH\t-\tA\tH\tA\tH\tH\tH\n+\tChr1\t13\t-\tA\tA\tH\tH\tA\t-\tA\t-\tH\tA\tH\tA\tA\t-\n+\tChr1\t16\t-\tA\tH\tH\tA\tH\tH\t-\t-\tH\tH\tA\tA\tH\tA\n+\tChr1\t17\t-\tA\tH\tH\tA\t-\tA\tA\tH\tH\tA\tH\t-\tA\tH\n+\tChr1\t19\t-\tA\t-\tH\tA\tH\tA\t-\tH\tH\tA\tA\tA\tH\tA\n+\tChr1\t20\t-\tA\t-\tH\tA\tH\tA\t-\tH\tH\tA\tA\tA\tH\tA\n+\tChr1\t21\t-\tA\t-\tH\tA\tH\tA\t-\tH\tH\tA\tA\tA\tH\tA\n+\tChr1\t22\t-\tA\t-\tH\tA\tH\tA\t-\tH\tH\tA\tA\tA\tH\tA\n+\tChr1\t23\t-\tA\tA\tH\tA\tH\tH\tH\tA\tH\t-\tA\tH\tA\tA\n+\tChr1\t26\t-\tA\tA\tH\tH\tH\tH\tH\tA\tA\tH\tA\tH\tA\t-\n+\tChr1\t27\t-\tA\tA\tH\tH\tH\tH\tA\tH\tA\tH\tA\tH\tA\tA\n+\tChr1\t28\t-\tA\tH\tH\tH\tA\tA\tH\tH\t-\tH\tA\tH\tH\tA\n+\tChr1\t29\t-\tA\tH\tH\tH\tH\tA\tH\tA\tH\tA\tA\tH\tA\tH\n+\tChr1\t32\t-\tA\tA\tH\tA\tA\tA\tH\tH\tA\tA\tA\tA\tA\tH\n+\tChr2\t2\t-\tA\tH\tH\tA\tA\t-\tH\tH\tH\tH\tA\tA\tA\t-\n+\tChr2\t7\t-\tA\t-\tH\t-\tH\tA\tA\tA\tH\tA\tA\t-\tA\tA\n+\tChr2\t10\t-\tA\tH\tH\tH\tH\tA\tH\tA\tA\tA\tA\tA\t-\tA\n+\tChr2\t11\t-\tA\tH\tH\tH\tH\tA\tH\tA\tA\tA\tA\tA\t-\tA\n+\tChr2\t12\t-\tA\tH\tH\tH\tH\tA\tH\tA\tA\tA\tA\tA\t-\tA\n+\tChr2\t13\t-\tA\tH\tH\tH\tH\tA\tH\tA\tA\tA\tA\tA\t-\tA\n+\tChr2\t14\t-\tA\tH\tH\tA\tH\tA\tH\tA\tH\tA\tA\tH\tH\tH\n+\tChr2\t21\t-\tA\tH\tH\tA\tA\tA\tH\tH\tH\tA\tA\tH\tH\tH\n+\tChr2\t23\t-\tA\tA\tH\tH\tA\tA\tH\tH\tH\tH\tH\tA\tA\tA\n+\tChr2\t25\t-\tA\tH\tH\tA\tA\tH\tA\tH\tA\tA\tH\tA\tA\tH\n+\tChr2\t30\t-\tA\tA\tH\tA\tH\tH\tA\tA\tH\t-\tH\tH\tH\t-\n+\n+- with A - H code and merge  : \n+ \n+ - tab file : ::\n+ \n+\tCHROM\tPOS\treference\tREF1\tG01\tREF2\tG02\tG03\tG04\tG05\tG06\tG07\tG08\tG09\tG10\tG11\tG12\n+\tChr1\t*M_00001\t-\tA\tH\tH\tH\t-\tH\tH\tH\tH\tH\tA\tH\tA\tA\n+\tChr1\t*M_00002\t-\tA\tH\tH\tA\tH\tH\tH\t-\tA\tH\tA\tH\tH\tH\n+\tChr1\t*M_00003\t-\tA\tA\tH\tH\tA\t-\tA\t-\tH\tA\tH\tA\tA\t-\n+\tChr1\t*M_00004\t-\tA\tH\tH\tA\tH\tH\t-\t-\tH\tH\tA\tA\tH\tA\n+\tChr1\t*M_00005\t-\tA\tH\tH\tA\t-\tA\tA\tH\tH\tA\tH\t-\tA\tH\n+\tChr1\t*M_00006\t-\tA\t-\tH\tA\tH\tA\t-\tH\tH\tA\tA\tA\tH\tA\n+\tChr1\t*M_00007\t-\tA\tA\tH\tA\tH\tH\tH\tA\tH\t-\tA\tH\tA\tA\n+\tChr1\t*M_00008\t-\tA\tA\tH\tH\tH\tH\tH\tA\tA\tH\tA\tH\tA\t-\n+\tChr1\t*M_00009\t-\tA\tA\tH\tH\tH\tH\tA\tH\tA\tH\tA\tH\tA\tA\n+\tChr1\t*M_00010\t-\tA\tH\tH\tH\tA\tA\tH\tH\t-\tH\tA\tH\tH\tA\n+\tChr1\t*M_00011\t-\tA\tH\tH\tH\tH\tA\tH\tA\tH\tA\tA\tH\tA\tH\n+\tChr1\t*M_00012\t-\tA\tA\tH\tA\tA\tA\tH\tH\tA\tA\tA\tA\tA\tH\n+\tChr2\t*M_00013\t-\tA\tH\tH\tA\tA\t-\tH\tH\tH\tH\tA\tA\tA\t-\n+\tChr2\t*M_00014\t-\tA\t-\tH\t-\tH\tA\tA\tA\tH\tA\tA\t-\tA\tA\n+\tChr2\t*M_00015\t-\tA\tH\tH\tH\tH\tA\tH\tA\tA\tA\tA\tA\t-\tA\n+\tChr2\t*M_00016\t-\tA\tH\tH\tA\tH\tA\tH\tA\tH\tA\tA\tH\tH\tH\n+\tChr2\t*M_00017\t-\tA\tH\tH\tA\tA\tA\tH\tH\tH\tA\tA\tH\tH\tH\n+\tChr2\t*M_00018\t-\tA\tA\tH\tH\tA\tA\tH\tH\tH\tH\tH\tA\tA\tA\n+\tChr2\t*M_00019\t-\tA\tH\tH\tA\tA\tH\tA\tH\tA\tA\tH\tA\tA\tH\n+\tChr2\t*M_00020\t-\tA\tA\tH\tA\tH\tH\tA\tA\tH\t-\tH\tH\tH\t-\n+\n+ - bed file : :: \n+    \n+\tChr1\t7\t7\t*M_00001\n+\tChr1\t9\t9\t*M_00002\n+\tChr1\t13\t13\t*M_00003\n+\tChr1\t16\t16\t*M_00004\n+\tChr1\t17\t17\t*M_00005\n+\tChr1\t19\t22\t*M_00006\n+\tChr1\t23\t23\t*M_00007\n+\tChr1\t26\t26\t*M_00008\n+\tChr1\t27\t27\t*M_00009\n+\tChr1\t28\t28\t*M_00010\n+\tChr1\t29\t29\t*M_00011\n+\tChr1\t32\t32\t*M_00012\n+\tChr2\t2\t2\t*M_00013\n+\tChr2\t7\t7\t*M_00014\n+\tChr2\t10\t13\t*M_00015\n+\tChr2\t14\t14\t*M_00016\n+\tChr2\t21\t21\t*M_00017\n+\tChr2\t23\t23\t*M_00018\n+\tChr2\t25\t25\t*M_00019\n+\tChr2\t30\t30\t*M_00020\n+\n+\n+-----\n+\n+**reference :**\n+\n+]]>\n+    </help>\n+</tool>\n'
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/VCFFiltering_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/VCFFiltering_wrapper.py Tue Apr 05 08:33:41 2016 -0400
[
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+
+
+import subprocess, tempfile, sys, os, glob, shutil, time
+from optparse import OptionParser
+from optparse import Option, OptionValueError
+
+class VCFFilteringWrapper(object):
+
+ def __init__(self):
+ self._options = None
+
+
+ def stop_err(self, msg ):
+ sys.stderr.write( "%s\n" % msg )
+ sys.exit()
+
+
+ def setAttributesFromCmdLine(self):
+ description = "VCFFiltering_wrapper"
+ description += "\nWrapper for VCFFiltering ;\n VCFFiltering filters SNP on a VCF depending on depth (DP) allele number (AN), allele frequency (AF) and SNP quality.\n"
+ description += "example 1 : VCFFiltering.py -f myVCF.vcf -o FilteredVCF.vcf\n"
+ description += "example 2 : VCFFiltering.py -f myVCF.vcf -N 2 -F 0.87 -b bed1.bed bed2.bed -o FilteredVCF.vcf\n"
+ parser = OptionParser(description = description, version = "0.2")
+ parser.add_option("-f", "--vcf",    dest = "VCFFile",   action = "store", type = "string", help = "Input VCF File name [compulsory] [format: VCF]",                                           default = "")
+ parser.add_option("-o", "--output", dest = "outFile",   action = "store", type = "string", help = "output VCF File name [compulsory] [format: VCF]",                                             default = "")
+ parser.add_option("-m", "--minDP",  dest = "minDP",  action = "store", type = "int", help = "minimum of depth ; if both minDP and maxDP are set, optimal DP will not be calculated ",     default = 0)
+ parser.add_option("-M", "--maxDP",  dest = "maxDP",  action = "store", type = "int", help = "maximum of depth ; if both minDP and maxDP are set, optimal DP will not be calculated ",     default = 0)
+ parser.add_option("-N", "--AN", dest = "AN", action = "store", type = "int", help = "maximum number of allele for a SNP; default = 2",                                                default = 2)
+ parser.add_option("-F", "--AF", dest = "AF", action = "store", type = "float",  help = "minimum frequency for the alternative allele of a SNP; default = 0.9",                        default = 0.9)
+ parser.add_option("-b", "--bed",    dest = "bedFiles",  action = "append", type = "string", help = "bed files: list of coordinates to filter, multiple arguments allowed '-b file1 file2' ",  default = [])
+ parser.add_option("-G", "--graphHTML", dest = "graphHTML", action = "store", type = "string", help = "name of the HTML linking to graphs ",                                                      default = "")
+ parser.add_option("-d", "--dirGraphs", dest = "dirGraphs", action = "store", type = "string", help = "name of the folder containing graphs ",                                                    default = "")
+ options = parser.parse_args()[0]
+ self._setAttributesFromOptions(options)
+
+
+ def _setAttributesFromOptions(self, options):
+ self._options = options
+
+ def run(self):
+ if self._options.minDP and self._options.maxDP :
+ if self._options.minDP > self._options.maxDP :
+ self.stop_err( 'error in options : minDP > max DP (%s > %s)' %(self._options.minDP,self._options.maxDP)) 
+
+ prg = "VCFFiltering.py -g -G 'png' "
+ args = ""
+ args += "-f %s" % self._options.VCFFile
+ args += " "
+ args += "-o %s" % self._options.outFile
+ if self._options.AF :
+ args += " "
+ args += "-F %s" % self._options.AF
+ if self._options.AN :
+ args += " "
+ args += "-N %s" % self._options.AN
+ if self._options.minDP :
+ args += " "
+ args += "-m %s" % self._options.minDP
+ if self._options.maxDP :
+ args += " "
+ args += "-M %s" % self._options.maxDP
+ for bedfile in self._options.bedFiles :
+ args += " "
+ args += "-b %s" % bedfile
+ cmd = "%s %s" %(prg, args)
+
+ print cmd
+
+ try:
+ tmp_err = tempfile.NamedTemporaryFile().name
+ tmp_stderr = open( tmp_err, 'wb' )
+ proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stderr=tmp_stderr )
+ returncode = proc.wait()
+ tmp_stderr.close()
+ # get stderr, allowing for case where it's very large
+ tmp_stderr = open( tmp_err, 'rb' )
+ stderr = ''
+ buffsize = 1048576
+ try:
+ while True:
+ stderr += tmp_stderr.read( buffsize )
+ if not stderr or len( stderr ) % buffsize != 0:
+ break
+ except OverflowError:
+ pass
+ tmp_stderr.close()
+ if stderr:
+ raise Exception, stderr
+ except Exception, e:
+ self.stop_err( 'Error in VCFFiltering:\n' + str( e ) ) 
+
+ if True :
+ html = open(self._options.graphHTML, "w")
+
+ os.mkdir(self._options.dirGraphs)
+ lGraphsFiles = glob.glob("VCFFiltering_graphs/*")
+ for file in lGraphsFiles :
+ baseName = os.path.basename(file)
+ shutil.move( file ,"%s/%s" %(self._options.dirGraphs, baseName))
+ line = "<img src=\"%s\" > \n" %(baseName)
+ html.write(line)
+
+
+if __name__ == "__main__":
+ iWrapper = VCFFilteringWrapper()
+ iWrapper.setAttributesFromCmdLine()
+ iWrapper.run()
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/VCFFiltering_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/VCFFiltering_wrapper.xml Tue Apr 05 08:33:41 2016 -0400
[
@@ -0,0 +1,157 @@
+<tool id="VCFFiltering" name="VCFFiltering" version="0.01">
+    <description>Filters SNP on a VCF depending on depth, allele number and allele frequency</description>
+    <requirements>
+        <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>
+    </requirements>
+    <version_command>
+  VCFFiltering.py --version
+    </version_command>
+    <command interpreter="python">
+ #if $DP_auto.is_DP_auto
+    VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN
+    #else
+    VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN -m $DP_auto.DPmin -M $DP_auto.DPmax
+ #end if
+ ## Bed files
+ #if $BedFile_List.is_BedFile
+ #for $bed in $BedFile_List.BedFiles
+  -b $bed.inputBed
+ #end for
+ #end if
+ --graphHTML $output_html --dirGraphs "$output_html.files_path"
+
+    </command>
+    <inputs>
+        <param name="inputVCF" type="data" format="vcf" label="Input VCF File name (from FreeBayes)"/>
+        <conditional name="DP_auto">
+            <param name="is_DP_auto" type="boolean" label="Calculate optimal depth range automatically" truevalue="yes" falsevalue="no" checked="on" />
+            <when value="yes"/>
+            <when value="no">
+                <param name="DPmin" type="integer" label="minumum Depth" value="1" help="default = 1">
+                    <validator type="in_range" min="0" message="DP can't be negative" />
+                </param>
+                <param name="DPmax" type="integer" label="maximum Depth" value="200" help="default = 200">
+                    <validator type="in_range" min="0" message="DP can't be negative"  />
+                </param>
+            </when>
+        </conditional>
+        <param name="AF" type="float" value="0.9" label="minimum allele frequency" help="default = 0.9">
+            <validator type="in_range" min="0.0" max="1.0"/>
+        </param>
+        <param name="AN" type="integer" value="2" label="maximum allele number" help="default = 2">
+            <validator type="in_range" min="1" message="Allele number can't be negative" />
+        </param>
+        <conditional name="BedFile_List">
+            <param name="is_BedFile" type="boolean" label="bed files : list of coordinates to filter, multiple beds allowed" truevalue="yes" falsevalue="no" checked="off" />
+            <when value="no"/>
+            <when value="yes">
+                <repeat name="BedFiles" title="bed files : list of coordinates to filter, multiple beds allowed" min="1">
+                    <param name="inputBed" type="data" format="bed" label="Select Bed file "/>
+                </repeat>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="outputVCF" label="${tool.name} on ${on_string} (vcf)"/>
+        <data format="html" name="output_html" label="${tool.name} graphs on ${on_string} (html)">
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="is_DP_auto" value="yes" />
+            <param name="AF" value="0.9"/>
+            <param name="AN" value="2"/>
+            <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" />
+            <output name="outputVCF" file="VCFFiltering_DPauto_output.vcf"/>
+        </test>
+        <test>
+            <param name="is_DP_auto" value="no" />
+            <param name="DPmin" value="4"/>
+            <param name="DPmax" value="200"/>
+            <param name="AF" value="0.9"/>
+            <param name="AN" value="2"/>
+            <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" />
+            <output name="outputVCF" file="VCFFiltering_DP_4_200_output.vcf"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**Filters SNP on a VCF depending on depth, allele number and allele frequency**
+
+-----
+
+**what it does :**
+
+VCFFiltering is a python script that allows to filter SNP results from freebayes on multiple criterias as once. The filters are : 
+
+ - Allele number : number of possible allele at the genomic position
+ - Allele frequency : frenquency of the most represented allele ; note that if the most represented allele is the reference (a "." in the 4th column of the VCF, the allele frequency will still work but allele frequency should be under 1-x)
+ - Depth : Higher and lower bound of the depth ; the depth is the number of reads mapped on the genomic positions.
+
+Depth can be automatically detected. If you do so, The 90 % of the positions with a depth closest to the most frequent depth will pass the filter. 
+
+This script has been developped to be used with freebayes output, on haploïd data. 
+
+
+.. class:: infomark
+
+the VCF source is detected from the header. Please keep the header of your VCF file if you want to use this tool
+
+-----
+
+**input and output formats :**
+
+input format is a VCF file obtaines with freebayes ; headers are necessary
+you can also add some bed files to filter some specific regions.
+
+output format is a filtered VCF file.
+
+-----
+
+**example :**
+
+
+VCF input file: ::
+
+    ##fileformat=VCFv4.1
+    ##fileDate=20150126
+    ##source=freeBayes v0.9.13-2-ga830efd
+    ##reference=ref.fsa
+    ##phasing=none
+    ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam"
+    #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT strain_1
+    chrom1 1 . T . . . DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:4:4:38:.:.:0
+    chrom1 2 . A . . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:12:11:38:.:.:0
+    chrom1 3 . T A . . DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:5:0:38:5:.:0
+    chrom1 4 . G T . . DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:6:1:38:5:.:0
+    chrom1 5 . C C . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:12:1:38:11:.:0
+
+purposed options: ::
+
+ - Calculate optimal depth range automatically = no
+ - minumum Depth = 5
+ - maximum Depth = 14
+ - minimum allele frequency = 0.9
+ - maximum allele number = 2
+
+exemple result : ::
+
+    ##fileformat=VCFv4.1
+    ##fileDate=20150126
+    ##source=freeBayes v0.9.13-2-ga830efd
+    ##reference=ref.fsa
+    ##phasing=none
+    ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam"
+    #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT strain_1
+    chrom1 1 . T . . G_DP DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177;G_AN=0;G_AF=0.00;G_DP=4;G_Base=T GT:DP:RO:QR:AO:QA:GL 0/0:4:4:38:.:.:0
+    chrom1 2 . A . . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177;G_AN=0;G_AF=0.08;G_DP=12;G_Base=A GT:DP:RO:QR:AO:QA:GL 0/0:12:11:38:.:.:0
+    chrom1 3 . T A . . DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=1.00;G_DP=5;G_Base=A GT:DP:RO:QR:AO:QA:GL 0/0:5:0:38:5:.:0
+    chrom1 4 . G T . G_AF DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=0.83;G_DP=6;G_Base=T GT:DP:RO:QR:AO:QA:GL 0/0:6:1:38:5:.:0
+    chrom1 5 . C C . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177;G_AN=0;G_AF=0.92;G_DP=12;G_Base=C GT:DP:RO:QR:AO:QA:GL 0/0:12:1:38:11:.:0
+
+-----
+
+**reference :**
+
+]]>
+    </help>
+</tool>
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/VCFStorage_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/VCFStorage_wrapper.py Tue Apr 05 08:33:41 2016 -0400
[
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+
+import subprocess, tempfile, sys, os, glob, shutil, time, random
+from optparse import OptionParser
+from optparse import Option, OptionValueError
+
+class VCFStorageWrapper(object):
+
+ def __init__(self):
+ self._options = None
+
+
+ def stop_err(self, msg ):
+ sys.stderr.write( "%s\n" % msg )
+ sys.exit()
+
+
+ def setAttributesFromCmdLine(self):
+ description = "VCFStorage_wrapper"
+ description += "\nWrapper for VCFStorage\n VCFStorage "
+ description += "VCFStorage stores info from variant calling into a table. It will create a tabulate file with the different infos\n"
+ description += "example : VCFStorage -f fasta.fa -l genomelist.list -w workdir -o output.tab \n"
+ parser = OptionParser(description = description, version = "0.2")
+ parser.add_option("-f", "--fasta",            dest = "fastaFile",      action = "store",     type = "string", help = "Input fasta file name [compulsory] [format: Fasta]",                default = "")
+ parser.add_option("-l", "--genomeNameList",   dest = "genomeNameList", action = "append",    type = "string", help = "Input list of genome name ")
+ parser.add_option("-L", "--genomeFileList",   dest = "genomeFileList", action = "append",    type = "string", help = "Input list of genome VCF file ")
+ parser.add_option("-w", "--workDir",          dest = "workDir",        action = "store",     type = "string", help = "name of the workingDirectory",                                      default = "")
+ parser.add_option("-o", "--out",              dest = "outFile",        action = "store",     type = "string", help = "Output file name [compulsory] [format: tab]",                       default = "")
+ options = parser.parse_args()[0]
+ self._setAttributesFromOptions(options)
+
+
+ def _setAttributesFromOptions(self, options):
+ self._options = options
+
+ def run(self):
+ self.createGenomeList()
+ cmd = self.createCmdLine()
+ self.launchCmdLine(cmd)
+
+ def createGenomeList(self):
+ self.genomelistFile = "%s.genomeListFile" % self._options.outFile
+ lGenomeName = self._options.genomeNameList
+ lGenomeVCF = self._options.genomeFileList
+ output = open(self.genomelistFile, "w")
+ for i,genomeName in enumerate(lGenomeName) :
+ output.write("%s\t%s\n" % (lGenomeName[i],lGenomeVCF[i]))
+ output.close()
+
+ def createCmdLine(self):
+ workdir = "VCFStorage_%s_%d" % (time.strftime("%d%H%M%S"), random.randint(0, 10000))
+ prg = "VCFStorage.py"
+ args = ""
+ args += "-f %s" % self._options.fastaFile
+ args += " "
+ args += "-o %s" % self._options.outFile
+ args += " "
+ args += "-l %s" % self.genomelistFile
+ args += " "
+ args += "-w %s" % workdir
+ cmd = "%s %s" %(prg, args)
+
+ print cmd
+ return cmd
+
+ def launchCmdLine(self, cmd):
+ try:
+ tmp_err = tempfile.NamedTemporaryFile().name
+ tmp_stderr = open( tmp_err, 'wb' )
+ proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stderr=tmp_stderr )
+ returncode = proc.wait()
+ tmp_stderr.close()
+ # get stderr, allowing for case where it's very large
+ tmp_stderr = open( tmp_err, 'rb' )
+ stderr = ''
+ buffsize = 1048576
+ try:
+ while True:
+ stderr += tmp_stderr.read( buffsize )
+ if not stderr or len( stderr ) % buffsize != 0:
+ break
+ except OverflowError:
+ pass
+ tmp_stderr.close()
+ if stderr:
+ raise Exception, stderr
+ except Exception, e:
+ os.remove(self.genomelistFile)
+ self.stop_err( 'Error in VCFStorage:\n' + str( e ) ) 
+ os.remove(self.genomelistFile)
+
+if __name__ == "__main__":
+ iWrapper = VCFStorageWrapper()
+ iWrapper.setAttributesFromCmdLine()
+ iWrapper.run()
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/VCFStorage_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/VCFStorage_wrapper.xml Tue Apr 05 08:33:41 2016 -0400
[
b'@@ -0,0 +1,236 @@\n+<tool id="VCFStorage" name="VCFStorage" version="0.01">\n+  <description> stores info from variant calling into a table. It will create a tabulate filed with SNP infos</description>\n+  <requirements>\n+    <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>\n+  </requirements>\n+  <version_command>\n+  VCFStorage.py --version\n+  </version_command>\n+  <command interpreter="python">\n+    VCFStorage_wrapper.py -f $inputFasta -o $outputVCFStorage \n+    ## genome list\n+    #for $VCF in $VCFFile\n+     -l $VCF.strainName\n+     -L $VCF.inputStrainVCF\n+    #end for\n+  </command>\n+  <inputs>\n+    <param name="inputFasta" type="data" format="fasta" label="Input genome sequence file name (fasta)"/>\n+    <repeat name="VCFFile" title="VCF list" min="1">\n+        <param name="strainName" size="20" type="text" value="V1" label="strain name (no space allowed)"/>\n+        <param name="inputStrainVCF" type="data" format="vcf" label="Select VCF file "/>\n+    </repeat>\n+  </inputs>\n+  <outputs>\n+    <data format="tabular" name="outputVCFStorage" label="${tool.name} on ${on_string} (tabular)"/>\n+  </outputs>\n+  <tests>\n+    <test>\n+        <param name="inputFasta" ftype="fasta" value="chr17.fa" />\n+        <param name="strainName" value="V1"/>\n+        <param name="inputStrainVCF" ftype="vcf" value="chr17.VCF"/>\n+        <output name="outputVCFStorage" ftype="tabular" file="Expchr17.tab"/>\n+    </test>\n+  </tests>\n+  <help><![CDATA[\n+    **stores info from variant calling into a table. It will create a tabulate filed with SNP infos**\n+    \n+-----\n+\n+**what it does :**\n+\n+VCFStorage.py is a python script that allows to store data from multiple VCF into a single tabular marker file. each VCF will be a new column on the final output.\n+\n+-----\n+\n+**input format :**\n+\n+Multiple files are necessary as input : \n+\n+ - the fasta file of your genomic sequence\n+ - multiple VCF files (1 per strain). It is strongly advised to use the column filter (col 7) for filtered positions instead of removing the lines from the VCF. \n+\n+-----\n+\n+**ouput format :**\n+\n+the result is a tab delimited format file  where all genomic positions are in rows, and all strains are in columns (in the order you gave the VCF)\n+\n+For each position and each genome, a code is attributed : \n+\n+- for the reference : ::\n+\n+    A,T,G,C for the corresponding nucleotidic acid \n+\n+- for the genomes : ::\n+\n+    U if the position was not refered in the VCF file \n+    R if the base is similar to the reference \n+    F if the base has been filtered in the column FILTER (column 7) of the VCF \n+    A,T,G,C if the genome has a validated SNP at the position\n+\n+\n+-----\n+\n+**example :**\n+\n+fasta input file (genomic sequence): ::\n+    \n+    >chr_17\n+    ccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaa\n+    TACGCGCGCGCCTAACCCTACGACTTTAACCTACTCTAAACTCTCCTACTAGTACGTCTT\n+    \n+VCF input file : ::\n+\n+    ##fileformat=VCFv4.1\n+    ##fileDate=20140725\n+    ##source=freeBayes v0.9.13-2-ga830efd\n+    ##reference=exmple.fsa\n+    ##phasing=none\n+    ##DetectedFormat=freebayes\n+    ##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)">\n+    ##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)">\n+    ##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(15 - 35)">\n+    ##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected">\n+    ##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N">\n+    #CHROM    POS    ID    REF    ALT    QUAL    FILTER    INFO    FORMAT    V1\n+    chr_17    17    .    A    G    529.213    G_AF;G_DP    AB=0.583333;ABP=5.18177;AC=1;AF=0.5;AN=2;AO=21;CIGAR=1X;DP=36;DPB=36;DPRA=0;EPP=3.1137;EPPR=3.15506;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=77.012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=751;QR=535;RO=15;RPP=5.59539;RPPR=4.31318;RUN=1;SAF=11;SAP=3.1137;SAR=10;SRF=5;SRP=6.62942;SRR=10;TYPE=snp;tech'..b'=T    GT:DP:RO:QR:AO:QA:GL    1/1:37:0:0:37:1336:-10,-10,0\n+    chr_17    112    .    G    A    1276.25    G_DP    AB=0;ABP=0;AC=2;AF=1;AN=2;AO=40;CIGAR=1X;DP=40;DPB=40;DPRA=0;EPP=10.8276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=55.9501;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1471;QR=0;RO=0;RPP=10.8276;RPPR=0;RUN=1;SAF=26;SAP=10.8276;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=40;G_Base=A    GT:DP:RO:QR:AO:QA:GL    1/1:40:0:0:40:1471:-10,-10,0\n+\n+expected result : ::\n+\n+    CHROM    POS    reference    V1\n+    chr_17    1    C    U\n+    chr_17    2    C    U\n+    chr_17    3    C    U\n+    chr_17    4    T    U\n+    chr_17    5    A    U\n+    chr_17    6    A    U\n+    chr_17    7    C    U\n+    chr_17    8    C    U\n+    chr_17    9    C    U\n+    chr_17    10    T    U\n+    chr_17    11    A    U\n+    chr_17    12    A    U\n+    chr_17    13    C    U\n+    chr_17    14    C    U\n+    chr_17    15    C    U\n+    chr_17    16    T    U\n+    chr_17    17    A    F\n+    chr_17    18    A    U\n+    chr_17    19    C    U\n+    chr_17    20    C    U\n+    chr_17    21    C    U\n+    chr_17    22    T    U\n+    chr_17    23    A    U\n+    chr_17    24    A    U\n+    chr_17    25    C    U\n+    chr_17    26    C    U\n+    chr_17    27    C    U\n+    chr_17    28    T    U\n+    chr_17    29    A    U\n+    chr_17    30    A    U\n+    chr_17    31    C    U\n+    chr_17    32    C    U\n+    chr_17    33    C    U\n+    chr_17    34    T    U\n+    chr_17    35    A    U\n+    chr_17    36    A    U\n+    chr_17    37    C    G\n+    chr_17    38    C    U\n+    chr_17    39    C    U\n+    chr_17    40    T    F\n+    chr_17    41    A    U\n+    chr_17    42    A    U\n+    chr_17    43    C    U\n+    chr_17    44    C    U\n+    chr_17    45    C    U\n+    chr_17    46    T    U\n+    chr_17    47    A    U\n+    chr_17    48    A    U\n+    chr_17    49    C    U\n+    chr_17    50    C    U\n+    chr_17    51    C    U\n+    chr_17    52    T    U\n+    chr_17    53    A    U\n+    chr_17    54    A    U\n+    chr_17    55    C    U\n+    chr_17    56    C    U\n+    chr_17    57    C    U\n+    chr_17    58    T    U\n+    chr_17    59    A    U\n+    chr_17    60    A    R\n+    chr_17    61    T    U\n+    chr_17    62    A    U\n+    chr_17    63    C    U\n+    chr_17    64    G    U\n+    chr_17    65    C    U\n+    chr_17    66    G    U\n+    chr_17    67    C    U\n+    chr_17    68    G    U\n+    chr_17    69    C    U\n+    chr_17    70    G    U\n+    chr_17    71    C    U\n+    chr_17    72    C    U\n+    chr_17    73    T    R\n+    chr_17    74    A    U\n+    chr_17    75    A    U\n+    chr_17    76    C    U\n+    chr_17    77    C    U\n+    chr_17    78    C    U\n+    chr_17    79    T    U\n+    chr_17    80    A    U\n+    chr_17    81    C    T\n+    chr_17    82    G    U\n+    chr_17    83    A    U\n+    chr_17    84    C    U\n+    chr_17    85    T    U\n+    chr_17    86    T    U\n+    chr_17    87    T    U\n+    chr_17    88    A    U\n+    chr_17    89    A    U\n+    chr_17    90    C    U\n+    chr_17    91    C    U\n+    chr_17    92    T    U\n+    chr_17    93    A    U\n+    chr_17    94    C    U\n+    chr_17    95    T    U\n+    chr_17    96    C    U\n+    chr_17    97    T    U\n+    chr_17    98    A    U\n+    chr_17    99    A    U\n+    chr_17    100    A    U\n+    chr_17    101    C    U\n+    chr_17    102    T    U\n+    chr_17    103    C    U\n+    chr_17    104    T    U\n+    chr_17    105    C    F\n+    chr_17    106    C    U\n+    chr_17    107    T    U\n+    chr_17    108    A    U\n+    chr_17    109    C    U\n+    chr_17    110    T    U\n+    chr_17    111    A    U\n+    chr_17    112    G    F\n+    chr_17    113    T    U\n+    chr_17    114    A    U\n+    chr_17    115    C    U\n+    chr_17    116    G    U\n+    chr_17    117    T    U\n+    chr_17    118    C    U\n+    chr_17    119    T    U\n+    chr_17    120    T    U\n+\n+-----\n+\n+**reference :**\n+\n+]]>\n+  </help>\n+</tool>\n'
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/test-data/Expchr17.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/test-data/Expchr17.tab Tue Apr 05 08:33:41 2016 -0400
b
@@ -0,0 +1,121 @@
+CHROM POS reference V1
+chr_17 1 C U
+chr_17 2 C U
+chr_17 3 C U
+chr_17 4 T U
+chr_17 5 A U
+chr_17 6 A U
+chr_17 7 C U
+chr_17 8 C U
+chr_17 9 C U
+chr_17 10 T U
+chr_17 11 A U
+chr_17 12 A U
+chr_17 13 C U
+chr_17 14 C U
+chr_17 15 C U
+chr_17 16 T U
+chr_17 17 A F
+chr_17 18 A U
+chr_17 19 C U
+chr_17 20 C U
+chr_17 21 C U
+chr_17 22 T U
+chr_17 23 A U
+chr_17 24 A U
+chr_17 25 C U
+chr_17 26 C U
+chr_17 27 C U
+chr_17 28 T U
+chr_17 29 A U
+chr_17 30 A U
+chr_17 31 C U
+chr_17 32 C U
+chr_17 33 C U
+chr_17 34 T U
+chr_17 35 A U
+chr_17 36 A U
+chr_17 37 C G
+chr_17 38 C U
+chr_17 39 C U
+chr_17 40 T F
+chr_17 41 A U
+chr_17 42 A U
+chr_17 43 C U
+chr_17 44 C U
+chr_17 45 C U
+chr_17 46 T U
+chr_17 47 A U
+chr_17 48 A U
+chr_17 49 C U
+chr_17 50 C U
+chr_17 51 C U
+chr_17 52 T U
+chr_17 53 A U
+chr_17 54 A U
+chr_17 55 C U
+chr_17 56 C U
+chr_17 57 C U
+chr_17 58 T U
+chr_17 59 A U
+chr_17 60 A R
+chr_17 61 T U
+chr_17 62 A U
+chr_17 63 C U
+chr_17 64 G U
+chr_17 65 C U
+chr_17 66 G U
+chr_17 67 C U
+chr_17 68 G U
+chr_17 69 C U
+chr_17 70 G U
+chr_17 71 C U
+chr_17 72 C U
+chr_17 73 T R
+chr_17 74 A U
+chr_17 75 A U
+chr_17 76 C U
+chr_17 77 C U
+chr_17 78 C U
+chr_17 79 T U
+chr_17 80 A U
+chr_17 81 C T
+chr_17 82 G U
+chr_17 83 A U
+chr_17 84 C U
+chr_17 85 T U
+chr_17 86 T U
+chr_17 87 T U
+chr_17 88 A U
+chr_17 89 A U
+chr_17 90 C U
+chr_17 91 C U
+chr_17 92 T U
+chr_17 93 A U
+chr_17 94 C U
+chr_17 95 T U
+chr_17 96 C U
+chr_17 97 T U
+chr_17 98 A U
+chr_17 99 A U
+chr_17 100 A U
+chr_17 101 C U
+chr_17 102 T U
+chr_17 103 C U
+chr_17 104 T U
+chr_17 105 C F
+chr_17 106 C U
+chr_17 107 T U
+chr_17 108 A U
+chr_17 109 C U
+chr_17 110 T U
+chr_17 111 A U
+chr_17 112 G F
+chr_17 113 T U
+chr_17 114 A U
+chr_17 115 C U
+chr_17 116 G U
+chr_17 117 T U
+chr_17 118 C U
+chr_17 119 T U
+chr_17 120 T U
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/test-data/VCFCarto_input.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/test-data/VCFCarto_input.tab Tue Apr 05 08:33:41 2016 -0400
b
@@ -0,0 +1,76 @@
+CHROM POS reference REF1 G01 REF2 G02 G03 G04 G05 G06 G07 G08 G09 G10 G11 G12
+Chr1 1 A R R R R U R R R R R R R R R
+Chr1 2 T R R R R R U R R R R R R R R
+Chr1 3 G R R R R R R R R R R R R R R
+Chr1 4 G R R R R R R R R R R R R F R
+Chr1 5 G R R R R R R U F R R R R R R
+Chr1 6 C R R R R R R R R R R R R R U
+Chr1 7 A G C C C F C C C C C G C G G
+Chr1 8 G R R R R R R R R R R R R R R
+Chr1 9 C R T T R T T T U R T R T T T
+Chr1 10 T R R R R R R R R R R R R R U
+Chr1 11 T R R R R R R R R R R R F R R
+Chr1 12 A R R R R U R R R R F R R R R
+Chr1 13 A R R G G R F R F G R G R R F
+Chr1 14 A R R R R R R R R F R R R R R
+Chr1 15 G R R R U R F R R R R R R U U
+Chr1 16 G A R R A R R U F R R A A R A
+Chr1 17 A R G G R U R R G G R G U R G
+Chr1 18 C R R R R R U R R R R R R R R
+Chr1 19 G C U R C R C U R R C C C R C
+Chr1 20 G A U R A R A U R R A A A R A
+Chr1 21 G T U R T R T U R R T T T R T
+Chr1 22 A T U R T R T U R R T T T R T
+Chr1 23 C T T R T R R R T R U T R T T
+Chr1 24 T R R R R R U R R R R R R R F
+Chr1 25 G R F R R R R R U R F R R R R
+Chr1 26 T R R C C C C C R R C R C R U
+Chr1 27 C R R G G G G R G R G R G R R
+Chr1 28 C G T T T G G T T F T G T T G
+Chr1 29 G T R R R R T R T R T T R T R
+Chr1 30 T R R R R R R R R R R R R R R
+Chr1 31 A R R R R F R R R R F R R R R
+Chr1 32 A G G R G G G R R G G G G G R
+Chr1 33 G R R R R R R R R R R R R R R
+Chr1 34 C R R R R R R R R R R R R R R
+Chr1 35 C R R R R R F R R R R R R R U
+Chr2 1 T R R R F R R R R R R R R R R
+Chr2 2 A C R R C C U R R R R C C C U
+Chr2 3 C R R R R R R U R R R R R R R
+Chr2 4 C R R R R R R R U R R R R F R
+Chr2 5 T R R R R R R R R R R R R R R
+Chr2 6 C R R R R R R R R R R R R R R
+Chr2 7 A T F R U R T T T R T T F T T
+Chr2 8 T R R R R R R R R R R R R R R
+Chr2 9 C R R R R R R R R R R R R R R
+Chr2 10 G R T T T T R T R R R R R U R
+Chr2 11 C R A A A A R A R R R R R U R
+Chr2 12 A R T T T T R T R R R R R U R
+Chr2 13 T R C C C C R C R R R R R U R
+Chr2 14 C T A A T A T A T A T T A A A
+Chr2 15 T R R R F R R R R R R R R R R
+Chr2 16 A R R R R R R R U R R R R R R
+Chr2 17 A R U R R R R R R R R R R R F
+Chr2 18 G R R R R R R R R R R R R R R
+Chr2 19 A R R R R R R F R R R R R R R
+Chr2 20 C R R R R R R R F R R R R R R
+Chr2 21 G A R R A A A R R R A A R R R
+Chr2 22 A R R R R R R F R R R R R R R
+Chr2 23 A R R T T R R T T T T T R R R
+Chr2 24 T R R R R R R U R R R R R R F
+Chr2 25 T R A A R R A R A R R A R R A
+Chr2 26 G R R R R R R R R R R R R R R
+Chr2 27 A R R R R R R R R R R R R U R
+Chr2 28 C R U R R F F R R F R F U R R
+Chr2 29 G R R R R R R F R R R R R R R
+Chr2 30 T A A G A G G A A G F G G G U
+Chr2 31 A R R R R R R R R U U R R R R
+Chr2 32 G R R R R R R U U R R R R R R
+Chr2 33 G R U R R R R U R R R R R R R
+Chr2 34 A R R R U R R R R R R R R R R
+Chr2 35 G R R R R R R R R R R R R R R
+Chr2 36 T R R R R R R U R R R R R R R
+Chr3 1 T U R R R R R U R R R R R R R
+Chr3 2 T R R U R R R U R R R R R R R
+Chr3 3 T F R R R R R U R R R R R R R
+Chr3 4 T R R F R R R U R R R R R R R
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/test-data/VCFCarto_output.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/test-data/VCFCarto_output.tab Tue Apr 05 08:33:41 2016 -0400
b
@@ -0,0 +1,27 @@
+CHROM POS reference REF1 G01 REF2 G02 G03 G04 G05 G06 G07 G08 G09 G10 G11 G12
+Chr1 7 A G C C C F C C C C C G C G G
+Chr1 9 C R T T R T T T U R T R T T T
+Chr1 13 A R R G G R F R F G R G R R F
+Chr1 16 G A R R A R R U F R R A A R A
+Chr1 17 A R G G R U R R G G R G U R G
+Chr1 19 G C U R C R C U R R C C C R C
+Chr1 20 G A U R A R A U R R A A A R A
+Chr1 21 G T U R T R T U R R T T T R T
+Chr1 22 A T U R T R T U R R T T T R T
+Chr1 23 C T T R T R R R T R U T R T T
+Chr1 26 T R R C C C C C R R C R C R U
+Chr1 27 C R R G G G G R G R G R G R R
+Chr1 28 C G T T T G G T T F T G T T G
+Chr1 29 G T R R R R T R T R T T R T R
+Chr1 32 A G G R G G G R R G G G G G R
+Chr2 2 A C R R C C U R R R R C C C U
+Chr2 7 A T F R U R T T T R T T F T T
+Chr2 10 G R T T T T R T R R R R R U R
+Chr2 11 C R A A A A R A R R R R R U R
+Chr2 12 A R T T T T R T R R R R R U R
+Chr2 13 T R C C C C R C R R R R R U R
+Chr2 14 C T A A T A T A T A T T A A A
+Chr2 21 G A R R A A A R R R A A R R R
+Chr2 23 A R R T T R R T T T T T R R R
+Chr2 25 T R A A R R A R A R R A R R A
+Chr2 30 T A A G A G G A A G F G G G U
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/test-data/VCFCarto_output_merged.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/test-data/VCFCarto_output_merged.bed Tue Apr 05 08:33:41 2016 -0400
b
@@ -0,0 +1,20 @@
+Chr1 7 7 *M_00001
+Chr1 9 9 *M_00002
+Chr1 13 13 *M_00003
+Chr1 16 16 *M_00004
+Chr1 17 17 *M_00005
+Chr1 19 22 *M_00006
+Chr1 23 23 *M_00007
+Chr1 26 26 *M_00008
+Chr1 27 27 *M_00009
+Chr1 28 28 *M_00010
+Chr1 29 29 *M_00011
+Chr1 32 32 *M_00012
+Chr2 2 2 *M_00013
+Chr2 7 7 *M_00014
+Chr2 10 13 *M_00015
+Chr2 14 14 *M_00016
+Chr2 21 21 *M_00017
+Chr2 23 23 *M_00018
+Chr2 25 25 *M_00019
+Chr2 30 30 *M_00020
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/test-data/VCFCarto_output_merged.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/test-data/VCFCarto_output_merged.tab Tue Apr 05 08:33:41 2016 -0400
b
@@ -0,0 +1,21 @@
+CHROM POS reference REF1 G01 REF2 G02 G03 G04 G05 G06 G07 G08 G09 G10 G11 G12
+Chr1 *M_00001 - A H H H - H H H H H A H A A
+Chr1 *M_00002 - A H H A H H H - A H A H H H
+Chr1 *M_00003 - A A H H A - A - H A H A A -
+Chr1 *M_00004 - A H H A H H - - H H A A H A
+Chr1 *M_00005 - A H H A - A A H H A H - A H
+Chr1 *M_00006 - A - H A H A - H H A A A H A
+Chr1 *M_00007 - A A H A H H H A H - A H A A
+Chr1 *M_00008 - A A H H H H H A A H A H A -
+Chr1 *M_00009 - A A H H H H A H A H A H A A
+Chr1 *M_00010 - A H H H A A H H - H A H H A
+Chr1 *M_00011 - A H H H H A H A H A A H A H
+Chr1 *M_00012 - A A H A A A H H A A A A A H
+Chr2 *M_00013 - A H H A A - H H H H A A A -
+Chr2 *M_00014 - A - H - H A A A H A A - A A
+Chr2 *M_00015 - A H H H H A H A A A A A - A
+Chr2 *M_00016 - A H H A H A H A H A A H H H
+Chr2 *M_00017 - A H H A A A H H H A A H H H
+Chr2 *M_00018 - A A H H A A H H H H H A A A
+Chr2 *M_00019 - A H H A A H A H A A H A A H
+Chr2 *M_00020 - A A H A H H A A H - H H H -
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/test-data/VCFFiltering_DP_4_200_output.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/test-data/VCFFiltering_DP_4_200_output.vcf Tue Apr 05 08:33:41 2016 -0400
b
b'@@ -0,0 +1,110 @@\n+##fileformat=VCFv4.1\n+##fileDate=20140725\n+##source=freeBayes v0.9.13-2-ga830efd\n+##phasing=none\n+##DetectedFormat=freebayes\n+##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)">\n+##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)">\n+##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(4 - 200)">\n+##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected">\n+##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N">\n+##INFO=<ID=G_AN,Number=1,Type=Integer,Description="Total number of alleles calculated by VCFFiltering">\n+##INFO=<ID=G_AF,Number=1,Type=Float,Description="frequency of the most supported alternative allele calculated by VCFFiltering">\n+##INFO=<ID=G_base,Number=1,Type=String,Description="base of the most supported alternative allele found by VCFFiltering">\n+##INFO=<ID=G_DP,Number=1,Type=Integer,Description="total depth calculated by VCFFiltering">\n+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">\n+##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">\n+##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">\n+##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">\n+##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">\n+##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">\n+##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">\n+##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">\n+##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">\n+##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">\n+##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">\n+##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">\n+##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">\n+##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">\n+##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">\n+##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0'..b'=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.834078;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=35;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=G\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:35:2:76:-5.08162,0,-2.36476\n+chr_1\t4678\t.\tG\tA\t21.951\tG_AF;G_DP\tAB=0.666667;ABP=3.73412;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.136458;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=41;RO=1;RPP=3.0103;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t0/1:3:1:41:2:68:-4.94794,0,-2.78626\n+chr_1\t4686\t.\tC\tT\t81.6957\tG_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=3;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.73412;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=37.6667;MQMR=0;NS=1;NUMALT=1;ODDS=7.23731;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=110;QR=0;RO=0;RPP=3.73412;RPPR=0;RUN=1;SAF=2;SAP=3.73412;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=3;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:0:0:3:110:-9.44717,-0.842096,0\n+chr_1\t4699\t.\tG\tA\t103.379\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=4;CIGAR=1X;DP=4;DPB=4;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=36.75;MQMR=0;NS=1;NUMALT=1;ODDS=8.55324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=140;QR=0;RO=0;RPP=5.18177;RPPR=0;RUN=1;SAF=3;SAP=5.18177;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=4;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/1:4:0:0:4:140:-10,-1.11256,0\n+chr_1\t4717\t.\tC\tT\t18.6085\tG_AF;G_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=34;NS=1;NUMALT=1;ODDS=1.12012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=40;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:40:2:66:-4.87583,0,-2.24054\n+chr_1\t4727\t.\tG\tA\t43.4619\tG_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=32;MQMR=0;NS=1;NUMALT=1;ODDS=5.91984;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=0;RO=0;RPP=3.0103;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=2;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:66:-5.54913,-0.570955,0\n+chr_1\t4747\t.\tC\tT\t40.3421\tG_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=7.35324;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=34;MQMR=0;NS=1;NUMALT=1;ODDS=5.92045;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=0;RO=0;RPP=7.35324;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=2;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:68:-5.83919,-0.571218,0\n+chr_1\t3305168\t.\tT\tA,G\t280.615\tG_AF\tAB=0.75,0.25;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1;G_AN=2;G_AF=0.75;G_DP=12;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n+chr_1\t3305169\t.\tT\tA,G\t280.615\tG_AF\tAB=0.25,0.75;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1;G_AN=2;G_AF=0.75;G_DP=12;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n'
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/test-data/VCFFiltering_DPauto_output.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/test-data/VCFFiltering_DPauto_output.vcf Tue Apr 05 08:33:41 2016 -0400
b
b'@@ -0,0 +1,110 @@\n+##fileformat=VCFv4.1\n+##fileDate=20140725\n+##source=freeBayes v0.9.13-2-ga830efd\n+##phasing=none\n+##DetectedFormat=freebayes\n+##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)">\n+##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)">\n+##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(3 - 5)">\n+##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected">\n+##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N">\n+##INFO=<ID=G_AN,Number=1,Type=Integer,Description="Total number of alleles calculated by VCFFiltering">\n+##INFO=<ID=G_AF,Number=1,Type=Float,Description="frequency of the most supported alternative allele calculated by VCFFiltering">\n+##INFO=<ID=G_base,Number=1,Type=String,Description="base of the most supported alternative allele found by VCFFiltering">\n+##INFO=<ID=G_DP,Number=1,Type=Integer,Description="total depth calculated by VCFFiltering">\n+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">\n+##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">\n+##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">\n+##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">\n+##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">\n+##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">\n+##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">\n+##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">\n+##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">\n+##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">\n+##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">\n+##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">\n+##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">\n+##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">\n+##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">\n+##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5'..b'DPB=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.834078;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=35;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=G\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:35:2:76:-5.08162,0,-2.36476\n+chr_1\t4678\t.\tG\tA\t21.951\tG_AF\tAB=0.666667;ABP=3.73412;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.136458;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=41;RO=1;RPP=3.0103;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t0/1:3:1:41:2:68:-4.94794,0,-2.78626\n+chr_1\t4686\t.\tC\tT\t81.6957\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=3;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.73412;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=37.6667;MQMR=0;NS=1;NUMALT=1;ODDS=7.23731;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=110;QR=0;RO=0;RPP=3.73412;RPPR=0;RUN=1;SAF=2;SAP=3.73412;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=3;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:0:0:3:110:-9.44717,-0.842096,0\n+chr_1\t4699\t.\tG\tA\t103.379\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=4;CIGAR=1X;DP=4;DPB=4;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=36.75;MQMR=0;NS=1;NUMALT=1;ODDS=8.55324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=140;QR=0;RO=0;RPP=5.18177;RPPR=0;RUN=1;SAF=3;SAP=5.18177;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=4;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/1:4:0:0:4:140:-10,-1.11256,0\n+chr_1\t4717\t.\tC\tT\t18.6085\tG_AF\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=34;NS=1;NUMALT=1;ODDS=1.12012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=40;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:40:2:66:-4.87583,0,-2.24054\n+chr_1\t4727\t.\tG\tA\t43.4619\tG_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=32;MQMR=0;NS=1;NUMALT=1;ODDS=5.91984;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=0;RO=0;RPP=3.0103;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=2;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:66:-5.54913,-0.570955,0\n+chr_1\t4747\t.\tC\tT\t40.3421\tG_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=7.35324;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=34;MQMR=0;NS=1;NUMALT=1;ODDS=5.92045;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=0;RO=0;RPP=7.35324;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=2;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:68:-5.83919,-0.571218,0\n+chr_1\t3305168\t.\tT\tA,G\t280.615\tG_AF;G_DP\tAB=0.75,0.25;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1;G_AN=2;G_AF=0.75;G_DP=12;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n+chr_1\t3305169\t.\tT\tA,G\t280.615\tG_AF;G_DP\tAB=0.25,0.75;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1;G_AN=2;G_AF=0.75;G_DP=12;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n'
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/test-data/VCFFiltering_input.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/test-data/VCFFiltering_input.vcf Tue Apr 05 08:33:41 2016 -0400
b
b'@@ -0,0 +1,100 @@\n+##fileformat=VCFv4.1\n+##fileDate=20140725\n+##source=freeBayes v0.9.13-2-ga830efd\n+##phasing=none\n+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">\n+##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">\n+##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">\n+##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">\n+##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">\n+##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">\n+##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">\n+##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">\n+##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">\n+##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">\n+##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">\n+##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">\n+##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">\n+##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">\n+##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">\n+##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">\n+##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using '..b';MQM=41.5;MQMR=0;NS=1;NUMALT=1;ODDS=5.92133;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=69;QR=0;RO=0;RPP=3.0103;RPPR=0;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:69:-6.40152,-0.5716,0\n+chr_1\t4661\t.\tA\tG\t19.7502\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.834078;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=35;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:35:2:76:-5.08162,0,-2.36476\n+chr_1\t4678\t.\tG\tA\t21.951\t.\tAB=0.666667;ABP=3.73412;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.136458;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=41;RO=1;RPP=3.0103;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t0/1:3:1:41:2:68:-4.94794,0,-2.78626\n+chr_1\t4686\t.\tC\tT\t81.6957\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=3;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.73412;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=37.6667;MQMR=0;NS=1;NUMALT=1;ODDS=7.23731;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=110;QR=0;RO=0;RPP=3.73412;RPPR=0;RUN=1;SAF=2;SAP=3.73412;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:0:0:3:110:-9.44717,-0.842096,0\n+chr_1\t4699\t.\tG\tA\t103.379\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=4;CIGAR=1X;DP=4;DPB=4;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=36.75;MQMR=0;NS=1;NUMALT=1;ODDS=8.55324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=140;QR=0;RO=0;RPP=5.18177;RPPR=0;RUN=1;SAF=3;SAP=5.18177;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:4:0:0:4:140:-10,-1.11256,0\n+chr_1\t4717\t.\tC\tT\t18.6085\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=34;NS=1;NUMALT=1;ODDS=1.12012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=40;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:40:2:66:-4.87583,0,-2.24054\n+chr_1\t4727\t.\tG\tA\t43.4619\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=32;MQMR=0;NS=1;NUMALT=1;ODDS=5.91984;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=0;RO=0;RPP=3.0103;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:66:-5.54913,-0.570955,0\n+chr_1\t4747\t.\tC\tT\t40.3421\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=7.35324;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=34;MQMR=0;NS=1;NUMALT=1;ODDS=5.92045;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=0;RO=0;RPP=7.35324;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:68:-5.83919,-0.571218,0\n+chr_1\t3305168\t.\tT\tA,G\t280.615\t.\tAB=0.75,0.25;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n+chr_1\t3305169\t.\tT\tA,G\t280.615\t.\tAB=0.25,0.75;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n'
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/test-data/chr17.VCF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/test-data/chr17.VCF Tue Apr 05 08:33:41 2016 -0400
b
@@ -0,0 +1,21 @@
+##fileformat=VCFv4.1
+##fileDate=20140725
+##source=freeBayes v0.9.13-2-ga830efd
+##reference=exmple.fsa
+##phasing=none
+##DetectedFormat=freebayes
+##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)">
+##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)">
+##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(15 - 35)">
+##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected">
+##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT V1
+chr_17 17 . A G 529.213 G_AF;G_DP AB=0.583333;ABP=5.18177;AC=1;AF=0.5;AN=2;AO=21;CIGAR=1X;DP=36;DPB=36;DPRA=0;EPP=3.1137;EPPR=3.15506;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=77.012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=751;QR=535;RO=15;RPP=5.59539;RPPR=4.31318;RUN=1;SAF=11;SAP=3.1137;SAR=10;SRF=5;SRP=6.62942;SRR=10;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.58;G_DP=36;G_Base=G GT:DP:RO:QR:AO:QA:GL 0/1:36:15:535:21:751:-10,0,-10
+chr_17 37 . C G 1082.38 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=34;CIGAR=1X;DP=34;DPB=34;DPRA=0;EPP=3.26577;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=48.0391;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1243;QR=0;RO=0;RPP=15.5282;RPPR=0;RUN=1;SAF=18;SAP=3.26577;SAR=16;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=34;G_Base=G GT:DP:RO:QR:AO:QA:GL 1/1:34:0:0:34:1243:-10,-9.23017,0
+chr_17 40 . T T 825.518 G_AF AB=0;ABP=0;AC=2;AF=1;AN=2;AO=29;CIGAR=1X;DP=34;DPB=34;DPRA=0;EPP=6.67934;EPPR=13.8677;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=8.92992;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=1082;QR=178;RO=5;RPP=9.07545;RPPR=13.8677;RUN=1;SAF=13;SAP=3.68421;SAR=16;SRF=5;SRP=13.8677;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.85;G_DP=34;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:34:5:178:29:1082:-10,0,-6.82575
+chr_17 60 . A . 699.741 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=22;CIGAR=1X;DP=22;DPB=22;DPRA=0;EPP=17.2236;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=32.2544;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=823;QR=0;RO=0;RPP=9.32731;RPPR=0;RUN=1;SAF=12;SAP=3.40511;SAR=10;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=22;G_Base=G GT:DP:RO:QR:AO:QA:GL 1/1:22:0:0:22:823:-10,-5.98732,0
+chr_17 73 . T . 846.299 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=27;CIGAR=1X;DP=27;DPB=27;DPRA=0;EPP=16.6021;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=38.84;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1002;QR=0;RO=0;RPP=5.02092;RPPR=0;RUN=1;SAF=21;SAP=21.1059;SAR=6;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=27;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:27:0:0:27:1002:-10,-7.34226,0
+chr_17 81 . C T 764.464 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=25;CIGAR=1X;DP=25;DPB=25;DPRA=0;EPP=13.5202;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=36.1324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=902;QR=0;RO=0;RPP=3.79203;RPPR=0;RUN=1;SAF=19;SAP=17.6895;SAR=6;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=25;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:25:0:0:25:902:-10,-6.76842,0
+chr_17 105 . C T 1154 G_DP AB=0;ABP=0;AC=2;AF=1;AN=2;AO=37;CIGAR=1X;DP=37;DPB=37;DPRA=0;EPP=5.88603;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=52.0047;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1336;QR=0;RO=0;RPP=19.9713;RPPR=0;RUN=1;SAF=23;SAP=7.76406;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=37;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:37:0:0:37:1336:-10,-10,0
+chr_17 112 . G A 1276.25 G_DP AB=0;ABP=0;AC=2;AF=1;AN=2;AO=40;CIGAR=1X;DP=40;DPB=40;DPRA=0;EPP=10.8276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=55.9501;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1471;QR=0;RO=0;RPP=10.8276;RPPR=0;RUN=1;SAF=26;SAP=10.8276;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=40;G_Base=A GT:DP:RO:QR:AO:QA:GL 1/1:40:0:0:40:1471:-10,-10,0
+
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/test-data/chr17.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/test-data/chr17.fa Tue Apr 05 08:33:41 2016 -0400
b
@@ -0,0 +1,3 @@
+>chr_17
+ccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaa
+TACGCGCGCGCCTAACCCTACGACTTTAACCTACTCTAAACTCTCCTACTAGTACGTCTT
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFGandalfTools/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/tool_dependencies.xml Tue Apr 05 08:33:41 2016 -0400
b
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<tool_dependency>
+ <package name="VCF_Gandalf_Tools" version="1.0">
+ <install version="1.0">
+ <actions>
+ <action type="download_by_url">https://urgi.versailles.inra.fr/download/gandalf/VCFtools-1.2.tar.gz</action>
+ <action type="shell_command">python setup_VCFtools.py install</action>
+ <action type="move_directory_files">
+ <source_directory>.</source_directory>
+ <destination_directory>$INSTALL_DIR</destination_directory>
+ </action>
+ <action type="set_environment">
+ <environment_variable name="PYTHONPATH" action="prepend_to">$INSTALL_DIR</environment_variable>
+ <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+ </action>
+ </actions>
+ </install>
+ </package>
+</tool_dependency>
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFStorage_wrapper.py
--- a/VCFStorage_wrapper.py Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,96 +0,0 @@
-#!/usr/bin/env python
-
-
-import subprocess, tempfile, sys, os, glob, shutil, time, random
-from optparse import OptionParser
-from optparse import Option, OptionValueError
-
-class VCFStorageWrapper(object):
-
- def __init__(self):
- self._options = None
-
-
- def stop_err(self, msg ):
- sys.stderr.write( "%s\n" % msg )
- sys.exit()
-
-
- def setAttributesFromCmdLine(self):
- description = "VCFStorage_wrapper"
- description += "\nWrapper for VCFStorage\n VCFStorage "
- description += "VCFStorage stores info from variant calling into a table. It will create a tabulate file with the different infos\n"
- description += "example : VCFStorage -f fasta.fa -l genomelist.list -w workdir -o output.tab \n"
- parser = OptionParser(description = description, version = "0.1")
- parser.add_option("-f", "--fasta",            dest = "fastaFile",      action = "store",     type = "string", help = "Input fasta file name [compulsory] [format: Fasta]",                default = "")
- parser.add_option("-l", "--genomeNameList",   dest = "genomeNameList", action = "append",    type = "string", help = "Input list of genome name ")
- parser.add_option("-L", "--genomeFileList",   dest = "genomeFileList", action = "append",    type = "string", help = "Input list of genome VCF file ")
- parser.add_option("-w", "--workDir",          dest = "workDir",        action = "store",     type = "string", help = "name of the workingDirectory",                                      default = "")
- parser.add_option("-o", "--out",              dest = "outFile",        action = "store",     type = "string", help = "Output file name [compulsory] [format: tab]",                       default = "")
- options = parser.parse_args()[0]
- self._setAttributesFromOptions(options)
-
-
- def _setAttributesFromOptions(self, options):
- self._options = options
-
- def run(self):
- self.createGenomeList()
- cmd = self.createCmdLine()
- self.launchCmdLine(cmd)
-
- def createGenomeList(self):
- self.genomelistFile = "%s.genomeListFile" % self._options.outFile
- lGenomeName = self._options.genomeNameList
- lGenomeVCF = self._options.genomeFileList
- output = open(self.genomelistFile, "w")
- for i,genomeName in enumerate(lGenomeName) :
- output.write("%s\t%s\n" % (lGenomeName[i],lGenomeVCF[i]))
- output.close()
-
- def createCmdLine(self):
- workdir = "VCFStorage_%s_%d" % (time.strftime("%d%H%M%S"), random.randint(0, 10000))
- prg = "VCFStorage.py"
- args = ""
- args += "-f %s" % self._options.fastaFile
- args += " "
- args += "-o %s" % self._options.outFile
- args += " "
- args += "-l %s" % self.genomelistFile
- args += " "
- args += "-w %s" % workdir
- cmd = "%s %s" %(prg, args)
-
- print cmd
- return cmd
-
- def launchCmdLine(self, cmd):
- try:
- tmp_err = tempfile.NamedTemporaryFile().name
- tmp_stderr = open( tmp_err, 'wb' )
- proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stderr=tmp_stderr )
- returncode = proc.wait()
- tmp_stderr.close()
- # get stderr, allowing for case where it's very large
- tmp_stderr = open( tmp_err, 'rb' )
- stderr = ''
- buffsize = 1048576
- try:
- while True:
- stderr += tmp_stderr.read( buffsize )
- if not stderr or len( stderr ) % buffsize != 0:
- break
- except OverflowError:
- pass
- tmp_stderr.close()
- if stderr:
- raise Exception, stderr
- except Exception, e:
- os.remove(self.genomelistFile)
- self.stop_err( 'Error in VCFStorage:\n' + str( e ) ) 
- os.remove(self.genomelistFile)
-
-if __name__ == "__main__":
- iWrapper = VCFStorageWrapper()
- iWrapper.setAttributesFromCmdLine()
- iWrapper.run()
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d VCFStorage_wrapper.xml
--- a/VCFStorage_wrapper.xml Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,236 +0,0 @@\n-<tool id="VCFStorage" name="VCFStorage" version="0.01">\n-  <description> stores info from variant calling into a table. It will create a tabulate filed with SNP infos</description>\n-  <requirements>\n-    <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>\n-  </requirements>\n-  <version_command>\n-  VCFStorage.py --version\n-  </version_command>\n-  <command interpreter="python">\n-    VCFStorage_wrapper.py -f $inputFasta -o $outputVCFStorage \n-    ## genome list\n-    #for $VCF in $VCFFile\n-     -l $VCF.strainName\n-     -L $VCF.inputStrainVCF\n-    #end for\n-  </command>\n-  <inputs>\n-    <param name="inputFasta" type="data" format="fasta" label="Input genome sequence file name (fasta)"/>\n-    <repeat name="VCFFile" title="VCF list" min="1">\n-        <param name="strainName" size="20" type="text" value="V1" label="strain name (no space allowed)"/>\n-        <param name="inputStrainVCF" type="data" format="vcf" label="Select VCF file "/>\n-    </repeat>\n-  </inputs>\n-  <outputs>\n-    <data format="tabular" name="outputVCFStorage" label="${tool.name} on ${on_string} (tabular)"/>\n-  </outputs>\n-  <tests>\n-    <test>\n-        <param name="inputFasta" ftype="fasta" value="chr17.fa" />\n-        <param name="strainName" value="V1"/>\n-        <param name="inputStrainVCF" ftype="vcf" value="chr17.VCF"/>\n-        <output name="outputVCFStorage" ftype="tabular" file="Expchr17.tab"/>\n-    </test>\n-  </tests>\n-  <help><![CDATA[\n-    **stores info from variant calling into a table. It will create a tabulate filed with SNP infos**\n-    \n------\n-\n-**what it does :**\n-\n-VCFStorage.py is a python script that allows to store data from multiple VCF into a single tabular marker file. each VCF will be a new column on the final output.\n-\n------\n-\n-**input format :**\n-\n-Multiple files are necessary as input : \n-\n- - the fasta file of your genomic sequence\n- - multiple VCF files (1 per strain). It is strongly advised to use the column filter (col 7) for filtered positions instead of removing the lines from the VCF. \n-\n------\n-\n-**ouput format :**\n-\n-the result is a tab delimited format file  where all genomic positions are in rows, and all strains are in columns (in the order you gave the VCF)\n-\n-For each position and each genome, a code is attributed : \n-\n-- for the reference : ::\n-\n-    A,T,G,C for the corresponding nucleotidic acid \n-\n-- for the genomes : ::\n-\n-    U if the position was not refered in the VCF file \n-    R if the base is similar to the reference \n-    F if the base has been filtered in the column FILTER (column 7) of the VCF \n-    A,T,G,C if the genome has a validated SNP at the position\n-\n-\n------\n-\n-**example :**\n-\n-fasta input file (genomic sequence): ::\n-    \n-    >chr_17\n-    ccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaa\n-    TACGCGCGCGCCTAACCCTACGACTTTAACCTACTCTAAACTCTCCTACTAGTACGTCTT\n-    \n-VCF input file : ::\n-\n-    ##fileformat=VCFv4.1\n-    ##fileDate=20140725\n-    ##source=freeBayes v0.9.13-2-ga830efd\n-    ##reference=exmple.fsa\n-    ##phasing=none\n-    ##DetectedFormat=freebayes\n-    ##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)">\n-    ##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)">\n-    ##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(15 - 35)">\n-    ##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected">\n-    ##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N">\n-    #CHROM    POS    ID    REF    ALT    QUAL    FILTER    INFO    FORMAT    V1\n-    chr_17    17    .    A    G    529.213    G_AF;G_DP    AB=0.583333;ABP=5.18177;AC=1;AF=0.5;AN=2;AO=21;CIGAR=1X;DP=36;DPB=36;DPRA=0;EPP=3.1137;EPPR=3.15506;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=77.012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=751;QR=535;RO=15;RPP=5.59539;RPPR=4.31318;RUN=1;SAF=11;SAP=3.1137;SAR=10;SRF=5;SRP=6.62942;SRR=10;TYPE=snp;tech'..b'=T    GT:DP:RO:QR:AO:QA:GL    1/1:37:0:0:37:1336:-10,-10,0\n-    chr_17    112    .    G    A    1276.25    G_DP    AB=0;ABP=0;AC=2;AF=1;AN=2;AO=40;CIGAR=1X;DP=40;DPB=40;DPRA=0;EPP=10.8276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=55.9501;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1471;QR=0;RO=0;RPP=10.8276;RPPR=0;RUN=1;SAF=26;SAP=10.8276;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=40;G_Base=A    GT:DP:RO:QR:AO:QA:GL    1/1:40:0:0:40:1471:-10,-10,0\n-\n-expected result : ::\n-\n-    CHROM    POS    reference    V1\n-    chr_17    1    C    U\n-    chr_17    2    C    U\n-    chr_17    3    C    U\n-    chr_17    4    T    U\n-    chr_17    5    A    U\n-    chr_17    6    A    U\n-    chr_17    7    C    U\n-    chr_17    8    C    U\n-    chr_17    9    C    U\n-    chr_17    10    T    U\n-    chr_17    11    A    U\n-    chr_17    12    A    U\n-    chr_17    13    C    U\n-    chr_17    14    C    U\n-    chr_17    15    C    U\n-    chr_17    16    T    U\n-    chr_17    17    A    F\n-    chr_17    18    A    U\n-    chr_17    19    C    U\n-    chr_17    20    C    U\n-    chr_17    21    C    U\n-    chr_17    22    T    U\n-    chr_17    23    A    U\n-    chr_17    24    A    U\n-    chr_17    25    C    U\n-    chr_17    26    C    U\n-    chr_17    27    C    U\n-    chr_17    28    T    U\n-    chr_17    29    A    U\n-    chr_17    30    A    U\n-    chr_17    31    C    U\n-    chr_17    32    C    U\n-    chr_17    33    C    U\n-    chr_17    34    T    U\n-    chr_17    35    A    U\n-    chr_17    36    A    U\n-    chr_17    37    C    G\n-    chr_17    38    C    U\n-    chr_17    39    C    U\n-    chr_17    40    T    F\n-    chr_17    41    A    U\n-    chr_17    42    A    U\n-    chr_17    43    C    U\n-    chr_17    44    C    U\n-    chr_17    45    C    U\n-    chr_17    46    T    U\n-    chr_17    47    A    U\n-    chr_17    48    A    U\n-    chr_17    49    C    U\n-    chr_17    50    C    U\n-    chr_17    51    C    U\n-    chr_17    52    T    U\n-    chr_17    53    A    U\n-    chr_17    54    A    U\n-    chr_17    55    C    U\n-    chr_17    56    C    U\n-    chr_17    57    C    U\n-    chr_17    58    T    U\n-    chr_17    59    A    U\n-    chr_17    60    A    R\n-    chr_17    61    T    U\n-    chr_17    62    A    U\n-    chr_17    63    C    U\n-    chr_17    64    G    U\n-    chr_17    65    C    U\n-    chr_17    66    G    U\n-    chr_17    67    C    U\n-    chr_17    68    G    U\n-    chr_17    69    C    U\n-    chr_17    70    G    U\n-    chr_17    71    C    U\n-    chr_17    72    C    U\n-    chr_17    73    T    R\n-    chr_17    74    A    U\n-    chr_17    75    A    U\n-    chr_17    76    C    U\n-    chr_17    77    C    U\n-    chr_17    78    C    U\n-    chr_17    79    T    U\n-    chr_17    80    A    U\n-    chr_17    81    C    T\n-    chr_17    82    G    U\n-    chr_17    83    A    U\n-    chr_17    84    C    U\n-    chr_17    85    T    U\n-    chr_17    86    T    U\n-    chr_17    87    T    U\n-    chr_17    88    A    U\n-    chr_17    89    A    U\n-    chr_17    90    C    U\n-    chr_17    91    C    U\n-    chr_17    92    T    U\n-    chr_17    93    A    U\n-    chr_17    94    C    U\n-    chr_17    95    T    U\n-    chr_17    96    C    U\n-    chr_17    97    T    U\n-    chr_17    98    A    U\n-    chr_17    99    A    U\n-    chr_17    100    A    U\n-    chr_17    101    C    U\n-    chr_17    102    T    U\n-    chr_17    103    C    U\n-    chr_17    104    T    U\n-    chr_17    105    C    F\n-    chr_17    106    C    U\n-    chr_17    107    T    U\n-    chr_17    108    A    U\n-    chr_17    109    C    U\n-    chr_17    110    T    U\n-    chr_17    111    A    U\n-    chr_17    112    G    F\n-    chr_17    113    T    U\n-    chr_17    114    A    U\n-    chr_17    115    C    U\n-    chr_17    116    G    U\n-    chr_17    117    T    U\n-    chr_17    118    C    U\n-    chr_17    119    T    U\n-    chr_17    120    T    U\n-\n------\n-\n-**reference :**\n-\n-]]>\n-  </help>\n-</tool>\n'
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d test-data/Expchr17.tab
--- a/test-data/Expchr17.tab Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,121 +0,0 @@
-CHROM POS reference V1
-chr_17 1 C U
-chr_17 2 C U
-chr_17 3 C U
-chr_17 4 T U
-chr_17 5 A U
-chr_17 6 A U
-chr_17 7 C U
-chr_17 8 C U
-chr_17 9 C U
-chr_17 10 T U
-chr_17 11 A U
-chr_17 12 A U
-chr_17 13 C U
-chr_17 14 C U
-chr_17 15 C U
-chr_17 16 T U
-chr_17 17 A F
-chr_17 18 A U
-chr_17 19 C U
-chr_17 20 C U
-chr_17 21 C U
-chr_17 22 T U
-chr_17 23 A U
-chr_17 24 A U
-chr_17 25 C U
-chr_17 26 C U
-chr_17 27 C U
-chr_17 28 T U
-chr_17 29 A U
-chr_17 30 A U
-chr_17 31 C U
-chr_17 32 C U
-chr_17 33 C U
-chr_17 34 T U
-chr_17 35 A U
-chr_17 36 A U
-chr_17 37 C G
-chr_17 38 C U
-chr_17 39 C U
-chr_17 40 T F
-chr_17 41 A U
-chr_17 42 A U
-chr_17 43 C U
-chr_17 44 C U
-chr_17 45 C U
-chr_17 46 T U
-chr_17 47 A U
-chr_17 48 A U
-chr_17 49 C U
-chr_17 50 C U
-chr_17 51 C U
-chr_17 52 T U
-chr_17 53 A U
-chr_17 54 A U
-chr_17 55 C U
-chr_17 56 C U
-chr_17 57 C U
-chr_17 58 T U
-chr_17 59 A U
-chr_17 60 A R
-chr_17 61 T U
-chr_17 62 A U
-chr_17 63 C U
-chr_17 64 G U
-chr_17 65 C U
-chr_17 66 G U
-chr_17 67 C U
-chr_17 68 G U
-chr_17 69 C U
-chr_17 70 G U
-chr_17 71 C U
-chr_17 72 C U
-chr_17 73 T R
-chr_17 74 A U
-chr_17 75 A U
-chr_17 76 C U
-chr_17 77 C U
-chr_17 78 C U
-chr_17 79 T U
-chr_17 80 A U
-chr_17 81 C T
-chr_17 82 G U
-chr_17 83 A U
-chr_17 84 C U
-chr_17 85 T U
-chr_17 86 T U
-chr_17 87 T U
-chr_17 88 A U
-chr_17 89 A U
-chr_17 90 C U
-chr_17 91 C U
-chr_17 92 T U
-chr_17 93 A U
-chr_17 94 C U
-chr_17 95 T U
-chr_17 96 C U
-chr_17 97 T U
-chr_17 98 A U
-chr_17 99 A U
-chr_17 100 A U
-chr_17 101 C U
-chr_17 102 T U
-chr_17 103 C U
-chr_17 104 T U
-chr_17 105 C F
-chr_17 106 C U
-chr_17 107 T U
-chr_17 108 A U
-chr_17 109 C U
-chr_17 110 T U
-chr_17 111 A U
-chr_17 112 G F
-chr_17 113 T U
-chr_17 114 A U
-chr_17 115 C U
-chr_17 116 G U
-chr_17 117 T U
-chr_17 118 C U
-chr_17 119 T U
-chr_17 120 T U
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d test-data/VCFCarto_input.tab
--- a/test-data/VCFCarto_input.tab Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,76 +0,0 @@
-CHROM POS reference REF1 G01 REF2 G02 G03 G04 G05 G06 G07 G08 G09 G10 G11 G12
-Chr1 1 A R R R R U R R R R R R R R R
-Chr1 2 T R R R R R U R R R R R R R R
-Chr1 3 G R R R R R R R R R R R R R R
-Chr1 4 G R R R R R R R R R R R R F R
-Chr1 5 G R R R R R R U F R R R R R R
-Chr1 6 C R R R R R R R R R R R R R U
-Chr1 7 A G C C C F C C C C C G C G G
-Chr1 8 G R R R R R R R R R R R R R R
-Chr1 9 C R T T R T T T U R T R T T T
-Chr1 10 T R R R R R R R R R R R R R U
-Chr1 11 T R R R R R R R R R R R F R R
-Chr1 12 A R R R R U R R R R F R R R R
-Chr1 13 A R R G G R F R F G R G R R F
-Chr1 14 A R R R R R R R R F R R R R R
-Chr1 15 G R R R U R F R R R R R R U U
-Chr1 16 G A R R A R R U F R R A A R A
-Chr1 17 A R G G R U R R G G R G U R G
-Chr1 18 C R R R R R U R R R R R R R R
-Chr1 19 G C U R C R C U R R C C C R C
-Chr1 20 G A U R A R A U R R A A A R A
-Chr1 21 G T U R T R T U R R T T T R T
-Chr1 22 A T U R T R T U R R T T T R T
-Chr1 23 C T T R T R R R T R U T R T T
-Chr1 24 T R R R R R U R R R R R R R F
-Chr1 25 G R F R R R R R U R F R R R R
-Chr1 26 T R R C C C C C R R C R C R U
-Chr1 27 C R R G G G G R G R G R G R R
-Chr1 28 C G T T T G G T T F T G T T G
-Chr1 29 G T R R R R T R T R T T R T R
-Chr1 30 T R R R R R R R R R R R R R R
-Chr1 31 A R R R R F R R R R F R R R R
-Chr1 32 A G G R G G G R R G G G G G R
-Chr1 33 G R R R R R R R R R R R R R R
-Chr1 34 C R R R R R R R R R R R R R R
-Chr1 35 C R R R R R F R R R R R R R U
-Chr2 1 T R R R F R R R R R R R R R R
-Chr2 2 A C R R C C U R R R R C C C U
-Chr2 3 C R R R R R R U R R R R R R R
-Chr2 4 C R R R R R R R U R R R R F R
-Chr2 5 T R R R R R R R R R R R R R R
-Chr2 6 C R R R R R R R R R R R R R R
-Chr2 7 A T F R U R T T T R T T F T T
-Chr2 8 T R R R R R R R R R R R R R R
-Chr2 9 C R R R R R R R R R R R R R R
-Chr2 10 G R T T T T R T R R R R R U R
-Chr2 11 C R A A A A R A R R R R R U R
-Chr2 12 A R T T T T R T R R R R R U R
-Chr2 13 T R C C C C R C R R R R R U R
-Chr2 14 C T A A T A T A T A T T A A A
-Chr2 15 T R R R F R R R R R R R R R R
-Chr2 16 A R R R R R R R U R R R R R R
-Chr2 17 A R U R R R R R R R R R R R F
-Chr2 18 G R R R R R R R R R R R R R R
-Chr2 19 A R R R R R R F R R R R R R R
-Chr2 20 C R R R R R R R F R R R R R R
-Chr2 21 G A R R A A A R R R A A R R R
-Chr2 22 A R R R R R R F R R R R R R R
-Chr2 23 A R R T T R R T T T T T R R R
-Chr2 24 T R R R R R R U R R R R R R F
-Chr2 25 T R A A R R A R A R R A R R A
-Chr2 26 G R R R R R R R R R R R R R R
-Chr2 27 A R R R R R R R R R R R R U R
-Chr2 28 C R U R R F F R R F R F U R R
-Chr2 29 G R R R R R R F R R R R R R R
-Chr2 30 T A A G A G G A A G F G G G U
-Chr2 31 A R R R R R R R R U U R R R R
-Chr2 32 G R R R R R R U U R R R R R R
-Chr2 33 G R U R R R R U R R R R R R R
-Chr2 34 A R R R U R R R R R R R R R R
-Chr2 35 G R R R R R R R R R R R R R R
-Chr2 36 T R R R R R R U R R R R R R R
-Chr3 1 T U R R R R R U R R R R R R R
-Chr3 2 T R R U R R R U R R R R R R R
-Chr3 3 T F R R R R R U R R R R R R R
-Chr3 4 T R R F R R R U R R R R R R R
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d test-data/VCFCarto_output.tab
--- a/test-data/VCFCarto_output.tab Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,27 +0,0 @@
-CHROM POS reference REF1 G01 REF2 G02 G03 G04 G05 G06 G07 G08 G09 G10 G11 G12
-Chr1 7 A G C C C F C C C C C G C G G
-Chr1 9 C R T T R T T T U R T R T T T
-Chr1 13 A R R G G R F R F G R G R R F
-Chr1 16 G A R R A R R U F R R A A R A
-Chr1 17 A R G G R U R R G G R G U R G
-Chr1 19 G C U R C R C U R R C C C R C
-Chr1 20 G A U R A R A U R R A A A R A
-Chr1 21 G T U R T R T U R R T T T R T
-Chr1 22 A T U R T R T U R R T T T R T
-Chr1 23 C T T R T R R R T R U T R T T
-Chr1 26 T R R C C C C C R R C R C R U
-Chr1 27 C R R G G G G R G R G R G R R
-Chr1 28 C G T T T G G T T F T G T T G
-Chr1 29 G T R R R R T R T R T T R T R
-Chr1 32 A G G R G G G R R G G G G G R
-Chr2 2 A C R R C C U R R R R C C C U
-Chr2 7 A T F R U R T T T R T T F T T
-Chr2 10 G R T T T T R T R R R R R U R
-Chr2 11 C R A A A A R A R R R R R U R
-Chr2 12 A R T T T T R T R R R R R U R
-Chr2 13 T R C C C C R C R R R R R U R
-Chr2 14 C T A A T A T A T A T T A A A
-Chr2 21 G A R R A A A R R R A A R R R
-Chr2 23 A R R T T R R T T T T T R R R
-Chr2 25 T R A A R R A R A R R A R R A
-Chr2 30 T A A G A G G A A G F G G G U
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d test-data/VCFCarto_output_merged.bed
--- a/test-data/VCFCarto_output_merged.bed Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,20 +0,0 @@
-Chr1 7 7 *M_00001
-Chr1 9 9 *M_00002
-Chr1 13 13 *M_00003
-Chr1 16 16 *M_00004
-Chr1 17 17 *M_00005
-Chr1 19 22 *M_00006
-Chr1 23 23 *M_00007
-Chr1 26 26 *M_00008
-Chr1 27 27 *M_00009
-Chr1 28 28 *M_00010
-Chr1 29 29 *M_00011
-Chr1 32 32 *M_00012
-Chr2 2 2 *M_00013
-Chr2 7 7 *M_00014
-Chr2 10 13 *M_00015
-Chr2 14 14 *M_00016
-Chr2 21 21 *M_00017
-Chr2 23 23 *M_00018
-Chr2 25 25 *M_00019
-Chr2 30 30 *M_00020
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d test-data/VCFCarto_output_merged.tab
--- a/test-data/VCFCarto_output_merged.tab Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-CHROM POS reference REF1 G01 REF2 G02 G03 G04 G05 G06 G07 G08 G09 G10 G11 G12
-Chr1 *M_00001 - A H H H - H H H H H A H A A
-Chr1 *M_00002 - A H H A H H H - A H A H H H
-Chr1 *M_00003 - A A H H A - A - H A H A A -
-Chr1 *M_00004 - A H H A H H - - H H A A H A
-Chr1 *M_00005 - A H H A - A A H H A H - A H
-Chr1 *M_00006 - A - H A H A - H H A A A H A
-Chr1 *M_00007 - A A H A H H H A H - A H A A
-Chr1 *M_00008 - A A H H H H H A A H A H A -
-Chr1 *M_00009 - A A H H H H A H A H A H A A
-Chr1 *M_00010 - A H H H A A H H - H A H H A
-Chr1 *M_00011 - A H H H H A H A H A A H A H
-Chr1 *M_00012 - A A H A A A H H A A A A A H
-Chr2 *M_00013 - A H H A A - H H H H A A A -
-Chr2 *M_00014 - A - H - H A A A H A A - A A
-Chr2 *M_00015 - A H H H H A H A A A A A - A
-Chr2 *M_00016 - A H H A H A H A H A A H H H
-Chr2 *M_00017 - A H H A A A H H H A A H H H
-Chr2 *M_00018 - A A H H A A H H H H H A A A
-Chr2 *M_00019 - A H H A A H A H A A H A A H
-Chr2 *M_00020 - A A H A H H A A H - H H H -
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d test-data/VCFFiltering_DP_4_200_output.vcf
--- a/test-data/VCFFiltering_DP_4_200_output.vcf Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,110 +0,0 @@\n-##fileformat=VCFv4.1\n-##fileDate=20140725\n-##source=freeBayes v0.9.13-2-ga830efd\n-##phasing=none\n-##DetectedFormat=freebayes\n-##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)">\n-##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)">\n-##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(4 - 200)">\n-##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected">\n-##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N">\n-##INFO=<ID=G_AN,Number=1,Type=Integer,Description="Total number of alleles calculated by VCFFiltering">\n-##INFO=<ID=G_AF,Number=1,Type=Float,Description="frequency of the most supported alternative allele calculated by VCFFiltering">\n-##INFO=<ID=G_base,Number=1,Type=String,Description="base of the most supported alternative allele found by VCFFiltering">\n-##INFO=<ID=G_DP,Number=1,Type=Integer,Description="total depth calculated by VCFFiltering">\n-##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">\n-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">\n-##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">\n-##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">\n-##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n-##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">\n-##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">\n-##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">\n-##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">\n-##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">\n-##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">\n-##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">\n-##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">\n-##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">\n-##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">\n-##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">\n-##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">\n-##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">\n-##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding\'s inequality">\n-##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding\'s inequality">\n-##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">\n-##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0'..b'=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.834078;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=35;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=G\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:35:2:76:-5.08162,0,-2.36476\n-chr_1\t4678\t.\tG\tA\t21.951\tG_AF;G_DP\tAB=0.666667;ABP=3.73412;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.136458;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=41;RO=1;RPP=3.0103;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t0/1:3:1:41:2:68:-4.94794,0,-2.78626\n-chr_1\t4686\t.\tC\tT\t81.6957\tG_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=3;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.73412;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=37.6667;MQMR=0;NS=1;NUMALT=1;ODDS=7.23731;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=110;QR=0;RO=0;RPP=3.73412;RPPR=0;RUN=1;SAF=2;SAP=3.73412;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=3;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:0:0:3:110:-9.44717,-0.842096,0\n-chr_1\t4699\t.\tG\tA\t103.379\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=4;CIGAR=1X;DP=4;DPB=4;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=36.75;MQMR=0;NS=1;NUMALT=1;ODDS=8.55324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=140;QR=0;RO=0;RPP=5.18177;RPPR=0;RUN=1;SAF=3;SAP=5.18177;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=4;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/1:4:0:0:4:140:-10,-1.11256,0\n-chr_1\t4717\t.\tC\tT\t18.6085\tG_AF;G_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=34;NS=1;NUMALT=1;ODDS=1.12012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=40;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:40:2:66:-4.87583,0,-2.24054\n-chr_1\t4727\t.\tG\tA\t43.4619\tG_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=32;MQMR=0;NS=1;NUMALT=1;ODDS=5.91984;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=0;RO=0;RPP=3.0103;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=2;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:66:-5.54913,-0.570955,0\n-chr_1\t4747\t.\tC\tT\t40.3421\tG_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=7.35324;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=34;MQMR=0;NS=1;NUMALT=1;ODDS=5.92045;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=0;RO=0;RPP=7.35324;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=2;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:68:-5.83919,-0.571218,0\n-chr_1\t3305168\t.\tT\tA,G\t280.615\tG_AF\tAB=0.75,0.25;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1;G_AN=2;G_AF=0.75;G_DP=12;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n-chr_1\t3305169\t.\tT\tA,G\t280.615\tG_AF\tAB=0.25,0.75;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1;G_AN=2;G_AF=0.75;G_DP=12;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n'
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d test-data/VCFFiltering_DPauto_output.vcf
--- a/test-data/VCFFiltering_DPauto_output.vcf Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,110 +0,0 @@\n-##fileformat=VCFv4.1\n-##fileDate=20140725\n-##source=freeBayes v0.9.13-2-ga830efd\n-##phasing=none\n-##DetectedFormat=freebayes\n-##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)">\n-##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)">\n-##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(3 - 5)">\n-##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected">\n-##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N">\n-##INFO=<ID=G_AN,Number=1,Type=Integer,Description="Total number of alleles calculated by VCFFiltering">\n-##INFO=<ID=G_AF,Number=1,Type=Float,Description="frequency of the most supported alternative allele calculated by VCFFiltering">\n-##INFO=<ID=G_base,Number=1,Type=String,Description="base of the most supported alternative allele found by VCFFiltering">\n-##INFO=<ID=G_DP,Number=1,Type=Integer,Description="total depth calculated by VCFFiltering">\n-##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">\n-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">\n-##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">\n-##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">\n-##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n-##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">\n-##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">\n-##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">\n-##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">\n-##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">\n-##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">\n-##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">\n-##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">\n-##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">\n-##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">\n-##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">\n-##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">\n-##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">\n-##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding\'s inequality">\n-##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding\'s inequality">\n-##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">\n-##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5'..b'DPB=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.834078;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=35;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=G\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:35:2:76:-5.08162,0,-2.36476\n-chr_1\t4678\t.\tG\tA\t21.951\tG_AF\tAB=0.666667;ABP=3.73412;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.136458;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=41;RO=1;RPP=3.0103;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t0/1:3:1:41:2:68:-4.94794,0,-2.78626\n-chr_1\t4686\t.\tC\tT\t81.6957\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=3;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.73412;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=37.6667;MQMR=0;NS=1;NUMALT=1;ODDS=7.23731;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=110;QR=0;RO=0;RPP=3.73412;RPPR=0;RUN=1;SAF=2;SAP=3.73412;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=3;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:0:0:3:110:-9.44717,-0.842096,0\n-chr_1\t4699\t.\tG\tA\t103.379\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=4;CIGAR=1X;DP=4;DPB=4;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=36.75;MQMR=0;NS=1;NUMALT=1;ODDS=8.55324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=140;QR=0;RO=0;RPP=5.18177;RPPR=0;RUN=1;SAF=3;SAP=5.18177;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=4;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/1:4:0:0:4:140:-10,-1.11256,0\n-chr_1\t4717\t.\tC\tT\t18.6085\tG_AF\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=34;NS=1;NUMALT=1;ODDS=1.12012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=40;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.67;G_DP=3;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:40:2:66:-4.87583,0,-2.24054\n-chr_1\t4727\t.\tG\tA\t43.4619\tG_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=32;MQMR=0;NS=1;NUMALT=1;ODDS=5.91984;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=0;RO=0;RPP=3.0103;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=2;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:66:-5.54913,-0.570955,0\n-chr_1\t4747\t.\tC\tT\t40.3421\tG_DP\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=7.35324;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=34;MQMR=0;NS=1;NUMALT=1;ODDS=5.92045;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=0;RO=0;RPP=7.35324;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=2;G_Base=T\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:68:-5.83919,-0.571218,0\n-chr_1\t3305168\t.\tT\tA,G\t280.615\tG_AF;G_DP\tAB=0.75,0.25;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1;G_AN=2;G_AF=0.75;G_DP=12;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n-chr_1\t3305169\t.\tT\tA,G\t280.615\tG_AF;G_DP\tAB=0.25,0.75;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1;G_AN=2;G_AF=0.75;G_DP=12;G_Base=A\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n'
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d test-data/VCFFiltering_input.vcf
--- a/test-data/VCFFiltering_input.vcf Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,100 +0,0 @@\n-##fileformat=VCFv4.1\n-##fileDate=20140725\n-##source=freeBayes v0.9.13-2-ga830efd\n-##phasing=none\n-##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">\n-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">\n-##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">\n-##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">\n-##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n-##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">\n-##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">\n-##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">\n-##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">\n-##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">\n-##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">\n-##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">\n-##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">\n-##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">\n-##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">\n-##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">\n-##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">\n-##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">\n-##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding\'s inequality">\n-##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding\'s inequality">\n-##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">\n-##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding\'s inequality">\n-##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">\n-##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding\'s inequality">\n-##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding\'s inequality">\n-##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using '..b';MQM=41.5;MQMR=0;NS=1;NUMALT=1;ODDS=5.92133;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=69;QR=0;RO=0;RPP=3.0103;RPPR=0;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:69:-6.40152,-0.5716,0\n-chr_1\t4661\t.\tA\tG\t19.7502\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.834078;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=35;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:35:2:76:-5.08162,0,-2.36476\n-chr_1\t4678\t.\tG\tA\t21.951\t.\tAB=0.666667;ABP=3.73412;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.0103;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=43;NS=1;NUMALT=1;ODDS=0.136458;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=41;RO=1;RPP=3.0103;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t0/1:3:1:41:2:68:-4.94794,0,-2.78626\n-chr_1\t4686\t.\tC\tT\t81.6957\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=3;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=3.73412;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=37.6667;MQMR=0;NS=1;NUMALT=1;ODDS=7.23731;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=110;QR=0;RO=0;RPP=3.73412;RPPR=0;RUN=1;SAF=2;SAP=3.73412;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:0:0:3:110:-9.44717,-0.842096,0\n-chr_1\t4699\t.\tG\tA\t103.379\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=4;CIGAR=1X;DP=4;DPB=4;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=36.75;MQMR=0;NS=1;NUMALT=1;ODDS=8.55324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=140;QR=0;RO=0;RPP=5.18177;RPPR=0;RUN=1;SAF=3;SAP=5.18177;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:4:0:0:4:140:-10,-1.11256,0\n-chr_1\t4717\t.\tC\tT\t18.6085\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=3;DPB=3;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=35;MQMR=34;NS=1;NUMALT=1;ODDS=1.12012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=40;RO=1;RPP=7.35324;RPPR=5.18177;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:3:1:40:2:66:-4.87583,0,-2.24054\n-chr_1\t4727\t.\tG\tA\t43.4619\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=32;MQMR=0;NS=1;NUMALT=1;ODDS=5.91984;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=66;QR=0;RO=0;RPP=3.0103;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:66:-5.54913,-0.570955,0\n-chr_1\t4747\t.\tC\tT\t40.3421\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=7.35324;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=34;MQMR=0;NS=1;NUMALT=1;ODDS=5.92045;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=68;QR=0;RO=0;RPP=7.35324;RPPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1\tGT:DP:RO:QR:AO:QA:GL\t1/1:2:0:0:2:68:-5.83919,-0.571218,0\n-chr_1\t3305168\t.\tT\tA,G\t280.615\t.\tAB=0.75,0.25;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n-chr_1\t3305169\t.\tT\tA,G\t280.615\t.\tAB=0.25,0.75;ABP=9.52472,9.52472;AC=1,1;AF=0.5,0.5;AN=2;AO=9,3;CIGAR=1X,1X;DP=12;DPB=12;DPRA=0,0;EPP=9.04217,3.73412;EPPR=0;GTI=0;LEN=1,1;MEANALT=2,2;MQM=60,60;MQMR=0;NS=1;NUMALT=2;ODDS=4.632;PAIRED=1,1;PAIREDR=0;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=319,107;QR=0;RO=0;RPP=9.04217,9.52472;RPPR=0;RUN=1,1;SAF=4,2;SAP=3.25157,3.73412;SAR=5,1;SRF=0;SRP=0;SRR=0;TYPE=snp,snp;technology.illumina=1,1\tGT:DP:RO:QR:AO:QA:GL\t1/2:12:0:0:9,3:319,107:-10,-8.89435,-6.43456,-10,0,-10\n'
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d test-data/chr17.VCF
--- a/test-data/chr17.VCF Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-##fileformat=VCFv4.1
-##fileDate=20140725
-##source=freeBayes v0.9.13-2-ga830efd
-##reference=exmple.fsa
-##phasing=none
-##DetectedFormat=freebayes
-##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)">
-##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)">
-##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(15 - 35)">
-##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected">
-##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N">
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT V1
-chr_17 17 . A G 529.213 G_AF;G_DP AB=0.583333;ABP=5.18177;AC=1;AF=0.5;AN=2;AO=21;CIGAR=1X;DP=36;DPB=36;DPRA=0;EPP=3.1137;EPPR=3.15506;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=77.012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=751;QR=535;RO=15;RPP=5.59539;RPPR=4.31318;RUN=1;SAF=11;SAP=3.1137;SAR=10;SRF=5;SRP=6.62942;SRR=10;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.58;G_DP=36;G_Base=G GT:DP:RO:QR:AO:QA:GL 0/1:36:15:535:21:751:-10,0,-10
-chr_17 37 . C G 1082.38 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=34;CIGAR=1X;DP=34;DPB=34;DPRA=0;EPP=3.26577;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=48.0391;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1243;QR=0;RO=0;RPP=15.5282;RPPR=0;RUN=1;SAF=18;SAP=3.26577;SAR=16;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=34;G_Base=G GT:DP:RO:QR:AO:QA:GL 1/1:34:0:0:34:1243:-10,-9.23017,0
-chr_17 40 . T T 825.518 G_AF AB=0;ABP=0;AC=2;AF=1;AN=2;AO=29;CIGAR=1X;DP=34;DPB=34;DPRA=0;EPP=6.67934;EPPR=13.8677;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=8.92992;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=1082;QR=178;RO=5;RPP=9.07545;RPPR=13.8677;RUN=1;SAF=13;SAP=3.68421;SAR=16;SRF=5;SRP=13.8677;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.85;G_DP=34;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:34:5:178:29:1082:-10,0,-6.82575
-chr_17 60 . A . 699.741 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=22;CIGAR=1X;DP=22;DPB=22;DPRA=0;EPP=17.2236;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=32.2544;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=823;QR=0;RO=0;RPP=9.32731;RPPR=0;RUN=1;SAF=12;SAP=3.40511;SAR=10;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=22;G_Base=G GT:DP:RO:QR:AO:QA:GL 1/1:22:0:0:22:823:-10,-5.98732,0
-chr_17 73 . T . 846.299 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=27;CIGAR=1X;DP=27;DPB=27;DPRA=0;EPP=16.6021;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=38.84;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1002;QR=0;RO=0;RPP=5.02092;RPPR=0;RUN=1;SAF=21;SAP=21.1059;SAR=6;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=27;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:27:0:0:27:1002:-10,-7.34226,0
-chr_17 81 . C T 764.464 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=25;CIGAR=1X;DP=25;DPB=25;DPRA=0;EPP=13.5202;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=36.1324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=902;QR=0;RO=0;RPP=3.79203;RPPR=0;RUN=1;SAF=19;SAP=17.6895;SAR=6;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=25;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:25:0:0:25:902:-10,-6.76842,0
-chr_17 105 . C T 1154 G_DP AB=0;ABP=0;AC=2;AF=1;AN=2;AO=37;CIGAR=1X;DP=37;DPB=37;DPRA=0;EPP=5.88603;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=52.0047;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1336;QR=0;RO=0;RPP=19.9713;RPPR=0;RUN=1;SAF=23;SAP=7.76406;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=37;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:37:0:0:37:1336:-10,-10,0
-chr_17 112 . G A 1276.25 G_DP AB=0;ABP=0;AC=2;AF=1;AN=2;AO=40;CIGAR=1X;DP=40;DPB=40;DPRA=0;EPP=10.8276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=55.9501;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1471;QR=0;RO=0;RPP=10.8276;RPPR=0;RUN=1;SAF=26;SAP=10.8276;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=40;G_Base=A GT:DP:RO:QR:AO:QA:GL 1/1:40:0:0:40:1471:-10,-10,0
-
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d test-data/chr17.fa
--- a/test-data/chr17.fa Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
->chr_17
-ccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaa
-TACGCGCGCGCCTAACCCTACGACTTTAACCTACTCTAAACTCTCCTACTAGTACGTCTT
b
diff -r cfd4eaadad42 -r 6bebeb76fa8d tool_dependencies.xml
--- a/tool_dependencies.xml Tue Dec 15 05:36:12 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,19 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
- <package name="VCF_Gandalf_Tools" version="1.0">
- <install version="1.0">
- <actions>
- <action type="download_by_url">https://urgi.versailles.inra.fr/download/gandalf/VCFtools-1.2.tar.gz</action>
- <action type="shell_command">python setup_VCFtools.py install</action>
- <action type="move_directory_files">
- <source_directory>.</source_directory>
- <destination_directory>$INSTALL_DIR</destination_directory>
- </action>
- <action type="set_environment">
- <environment_variable name="PYTHONPATH" action="prepend_to">$INSTALL_DIR</environment_variable>
- <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
- </action>
- </actions>
- </install>
- </package>
-</tool_dependency>