Previous changeset 0:0c5cc5763091 (2015-11-05) |
Commit message:
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/galaxytools-emc/tree/master/tools/galaxy-tool-shed-tools commit bd543e68c1af82bcd6a04f0ae3d1180e8887e122 |
modified:
test-data/example.vcf varscan_mpileup2snp.xml varscan_mpileup2snp_from_bam.xml |
added:
test-data/generate_reads.py.bak test-data/hg19_mutant.2.vcf |
removed:
test-data/example.fa.fai test-data/generate_reads.py tool_dependencies.xml |
b |
diff -r 0c5cc5763091 -r 9a39c4105901 test-data/example.fa.fai --- a/test-data/example.fa.fai Thu Nov 05 09:59:46 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -chr1 600 6 60 61 |
b |
diff -r 0c5cc5763091 -r 9a39c4105901 test-data/example.vcf --- a/test-data/example.vcf Thu Nov 05 09:59:46 2015 -0500 +++ b/test-data/example.vcf Wed Feb 15 16:16:01 2017 -0500 |
b |
@@ -22,10 +22,10 @@ ##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)"> ##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)"> #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 -chr1 24 . C G . PASS ADP=41;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:236:41:41:0:41:100%:2.3541E-24:0:84:0:0:22:19 -chr1 84 . G A . PASS ADP=59;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:59:59:0:59:100%:4.1056E-35:0:83:0:0:33:26 -chr1 146 . T C . PASS ADP=81;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:81:81:0:81:100%:2.7329E-48:0:86:0:0:49:32 -chr1 206 . A G . PASS ADP=79;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:79:79:0:79:100%:4.3185E-47:0:86:0:0:48:31 -chr1 495 . T G . PASS ADP=65;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:65:65:0:65:100%:1.0519E-38:0:89:0:0:31:34 -chr1 496 . A C . PASS ADP=64;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:64:64:0:64:100%:4.1752E-38:0:89:0:0:31:33 -chr1 497 . G C . PASS ADP=63;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:63:63:0:63:100%:1.657E-37:0:89:0:0:30:33 \ No newline at end of file +chr1 24 . C G . PASS ADP=50;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:50:50:0:50:100%:9.9117E-30:0:93:0:0:27:23 +chr1 84 . G A . PASS ADP=70;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:70:70:0:70:100%:1.0659E-41:0:93:0:0:38:32 +chr1 146 . T C . PASS ADP=85;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:85:85:0:85:100%:1.0935E-50:0:93:0:0:52:33 +chr1 206 . A G . PASS ADP=91;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:91:91:0:91:100%:2.7621E-54:0:93:0:0:51:40 +chr1 495 . T G . PASS ADP=75;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:75:75:0:75:100%:1.0773E-44:0:93:0:0:37:38 +chr1 496 . A C . PASS ADP=76;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:76:76:0:76:100%:2.711E-45:0:93:0:0:38:38 +chr1 497 . G C . PASS ADP=77;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:77:77:0:77:100%:6.8219E-46:0:93:0:0:38:39 |
b |
diff -r 0c5cc5763091 -r 9a39c4105901 test-data/generate_reads.py --- a/test-data/generate_reads.py Thu Nov 05 09:59:46 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,129 +0,0 @@ -#!/usr/bin/env python - - -import random -import math - - -__version_info__ = ('1', '0', '0') -__version__ = '.'.join(__version_info__) - - -class Region: - def __init__(self,start,stop,sequence): - self.start = start - self.stop = stop - self.sequence = sequence.strip().replace("\n","").replace(" ","") - if(len(self.sequence) != self.getSpanningLength()): - print "ERROR: sequence length: "+str(len(self.sequence))+", while spanning region is: "+str(self.getSpanningLength()) - import sys - sys.exit() - - def getSpanningLength(self): - return abs(self.stop-self.start+1) - -class ReadSynthesizer: - def __init__(self,chromosome): - self.regions = [] - self.chromosome = chromosome - - def addRegion(self,region): - self.regions.append(region) - - def produceReads(self,readDensity = 1,read_length = 50): - """ - Produces uniform reads by walking iteratively over self.regions - """ - - mRNA = self.getTotalmRNA() - spanning_length = self.getRegionSpanningLength() - n = spanning_length['total'] - read_length + 1 - - j = 0 - k = 0 - - for i in range(n): - # "alpha is playing the role of k and beta is playing the role of theta" - dd = max(0,int(round(random.lognormvariate(math.log(readDensity),0.5))))# Notice this is NOT a binomial distribution!! - - for d in range(dd): - sequence = mRNA[i:i+read_length] - - if(random.randint(0,1) == 0): - strand = 0 - else: - strand = 16 - flag = strand + 0 - - print "read_"+str(j)+"."+str(i)+"."+str(d)+"\t"+str(flag)+"\t"+self.chromosome+"\t"+str(self.regions[j].start + k)+"\t60\t"+self.getMappingString(read_length,j,k)+"\t*\t0\t0\t"+str(sequence.upper())+"\t*" - - spanning_length['iter'][j] -= 1 - if(k >= self.regions[j].getSpanningLength()-1): - j += 1 - k = 0 - else: - k += 1 - - def getMappingString(self,length,j,offset): - m = 0 - - out = "" - - for i in range(length): - k = i + offset - - if(k >= self.regions[j].getSpanningLength()): - j += 1 - - out += str(m)+"M" - out += (str(self.regions[j].start - self.regions[j-1].stop-1))+"N" - m = 1 - - offset = -k - else: - m += 1 - - out += str(m) + "M" - - - return out - - def getRegionSpanningLength(self): - length = {'total':0,'iter':[]} - for r in self.regions: - l = r.getSpanningLength() - length['iter'].append(l) - length['total'] += l - return length - - def getTotalmRNA(self): - mRNA = "" - for r in self.regions: - mRNA += r.sequence - return mRNA - - - -if __name__ == "__main__": - # Real world example snp - - #rs = ReadSynthesizer('chr6') - #rs.addRegion(Region(154360546,154360969,'ccaggactggtttctgtaagaaacagcaggagctgtggcagcggcgaaaggaagcggctgaggcgcttggaacccgaaaagtctcggtgctcctggctacctcgcacagcggtgcccgcccggccgtcagtaccatggacagcagcgctgcccccacgaacgccagcaattgcactgatgccttggcgtactcaagttgctccccagcacccagccccggttcctgggtcaacttgtcccacttagatggcGacctgtccgacccatgcggtccgaaccgcaccgacctgggcgggagagacagcctgtgccctccgaccggcagtccctccatgatcacggccatcacgatcatggccctctactccatcgtgtgcgtggtggggctcttcggaaacttcctggtcatgtatgtgattgtcag')) - #rs.addRegion(Region(154410961,154411313,'atacaccaagatgaagactgccaccaacatctacattttcaaccttgctctggcagatgccttagccaccagtaccctgcccttccagagtgtgaattacctaatgggaacatggccatttggaaccatcctttgcaagatagtgatctccatagattactataacatgttcaccagcatattcaccctctgcaccatgagtgttgatcgatacattgcagtctgccaccctgtcaaggccttagatttccgtactccccgaaatgccaaaattatcaatgtctgcaactggatcctctcttcagccattggtcttcctgtaatgttcatggctacaacaaaatacaggcaag')) - #rs.addRegion(Region(154412087,154412607,'gttccatagattgtacactaacattctctcatccaacctggtactgggaaaacctgctgaagatctgtgttttcatcttcgccttcattatgccagtgctcatcattaccgtgtgctatggactgatgatcttgcgcctcaagagtgtccgcatgctctctggctccaaagaaaaggacaggaatcttcgaaggatcaccaggatggtgctggtggtggtggctgtgttcatcgtctgctggactcccattcacatttacgtcatcattaaagccttggttacaatcccagaaactacgttccagactgtttcttggcacttctgcattgctctaggttacacaaacagctgcctcaacccagtcctttatgcatttctggatgaaaacttcaaacgatgcttcagagagttctgtatcccaacctcttccaacattgagcaacaaaactccactcgaattcgtcagaacactagagaccacccctccacggccaatacagtggatagaactaatcatcag')) - #rs.addRegion(Region(154428600,154428787,'gtggaattgaacctggactgtcactgtgaaaatgcaaagccttggccactgagctacaatgcagggcagtctccatttcccttcccaggaagagtctagagcattaattttgagtttgcaaaggcttgtaactatttcatatgatttttagagctgactatgacatgaaccctaaaattcctgttccc')) - #rs.produceReads(3,50) - - # Artificial SNP - rs = ReadSynthesizer('chr1') - rs.addRegion(Region( 0+1, 59+1,'aaataggtcccaaacgttacgca'+'G'+'tctatgcctgacaaagttgcgaccacttcctctgcc'))#c -> G - rs.addRegion(Region( 60+1,119+1,'ttgtgtgacacgccggagatagg'+'A'+'catcagcaagtacgttaagtacactgaacgaactgg'))#g -> A - rs.addRegion(Region(120+1,179+1,'aggtttctacatcgtgcgtgatggc'+'C'+'ctaggagaagtgggtgtatctgcacagcataagt'))#t -> C - rs.addRegion(Region(180+1,239+1,'tataagacggaagtaaagcgtcttc'+'G'+'ccgttcagcaccccacgctcatagtcaatgctgg'))#a -> G - #rs.addRegion(Region(240+1,299+1,'ttcagcatagtcaagcgccggtggcctccaaaaagacgcactgagtagcttagctacttt')) - #rs.addRegion(Region(300+1,359+1,'gctccgcttgcggaagcactaagaggagattgaatttccaaatcccccccgatacctgtg')) - #rs.addRegion(Region(360+1,419+1,'cggtcgctacgtaagtgcgaagttctgttagatacgctccccttagtatatgggcgttaa')) - #rs.addRegion(Region(420+1,479+1,'tcggaccgtcggtactcactgcattccaggtctcatatagttcgccctagaagcctggga')) - rs.addRegion(Region(480+1,539+1,'tgaacgttgaacta'+'GCC'+'ctgatgtaaaccccgcgtgccaattccaggcgtcatgggggca'))#tag -> gcc - #rs.addRegion(Region(540+1,599+1,'acccctcgcagcctccctcttgctgttggtgcctagtatttcatgatttcgagccgacat')) - rs.produceReads(2,35) |
b |
diff -r 0c5cc5763091 -r 9a39c4105901 test-data/generate_reads.py.bak --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/generate_reads.py.bak Wed Feb 15 16:16:01 2017 -0500 |
[ |
@@ -0,0 +1,129 @@ +#!/usr/bin/env python + + +import random +import math + + +__version_info__ = ('1', '0', '0') +__version__ = '.'.join(__version_info__) + + +class Region: + def __init__(self,start,stop,sequence): + self.start = start + self.stop = stop + self.sequence = sequence.strip().replace("\n","").replace(" ","") + if(len(self.sequence) != self.getSpanningLength()): + print "ERROR: sequence length: "+str(len(self.sequence))+", while spanning region is: "+str(self.getSpanningLength()) + import sys + sys.exit() + + def getSpanningLength(self): + return abs(self.stop-self.start+1) + +class ReadSynthesizer: + def __init__(self,chromosome): + self.regions = [] + self.chromosome = chromosome + + def addRegion(self,region): + self.regions.append(region) + + def produceReads(self,readDensity = 1,read_length = 50): + """ + Produces uniform reads by walking iteratively over self.regions + """ + + mRNA = self.getTotalmRNA() + spanning_length = self.getRegionSpanningLength() + n = spanning_length['total'] - read_length + 1 + + j = 0 + k = 0 + + for i in range(n): + # "alpha is playing the role of k and beta is playing the role of theta" + dd = max(0,int(round(random.lognormvariate(math.log(readDensity),0.5))))# Notice this is NOT a binomial distribution!! + + for d in range(dd): + sequence = mRNA[i:i+read_length] + + if(random.randint(0,1) == 0): + strand = 0 + else: + strand = 16 + flag = strand + 0 + + print "read_"+str(j)+"."+str(i)+"."+str(d)+"\t"+str(flag)+"\t"+self.chromosome+"\t"+str(self.regions[j].start + k)+"\t60\t"+self.getMappingString(read_length,j,k)+"\t*\t0\t0\t"+str(sequence.upper())+"\t*" + + spanning_length['iter'][j] -= 1 + if(k >= self.regions[j].getSpanningLength()-1): + j += 1 + k = 0 + else: + k += 1 + + def getMappingString(self,length,j,offset): + m = 0 + + out = "" + + for i in range(length): + k = i + offset + + if(k >= self.regions[j].getSpanningLength()): + j += 1 + + out += str(m)+"M" + out += (str(self.regions[j].start - self.regions[j-1].stop-1))+"N" + m = 1 + + offset = -k + else: + m += 1 + + out += str(m) + "M" + + + return out + + def getRegionSpanningLength(self): + length = {'total':0,'iter':[]} + for r in self.regions: + l = r.getSpanningLength() + length['iter'].append(l) + length['total'] += l + return length + + def getTotalmRNA(self): + mRNA = "" + for r in self.regions: + mRNA += r.sequence + return mRNA + + + +if __name__ == "__main__": + # Real world example snp + + #rs = ReadSynthesizer('chr6') + #rs.addRegion(Region(154360546,154360969,'ccaggactggtttctgtaagaaacagcaggagctgtggcagcggcgaaaggaagcggctgaggcgcttggaacccgaaaagtctcggtgctcctggctacctcgcacagcggtgcccgcccggccgtcagtaccatggacagcagcgctgcccccacgaacgccagcaattgcactgatgccttggcgtactcaagttgctccccagcacccagccccggttcctgggtcaacttgtcccacttagatggcGacctgtccgacccatgcggtccgaaccgcaccgacctgggcgggagagacagcctgtgccctccgaccggcagtccctccatgatcacggccatcacgatcatggccctctactccatcgtgtgcgtggtggggctcttcggaaacttcctggtcatgtatgtgattgtcag')) + #rs.addRegion(Region(154410961,154411313,'atacaccaagatgaagactgccaccaacatctacattttcaaccttgctctggcagatgccttagccaccagtaccctgcccttccagagtgtgaattacctaatgggaacatggccatttggaaccatcctttgcaagatagtgatctccatagattactataacatgttcaccagcatattcaccctctgcaccatgagtgttgatcgatacattgcagtctgccaccctgtcaaggccttagatttccgtactccccgaaatgccaaaattatcaatgtctgcaactggatcctctcttcagccattggtcttcctgtaatgttcatggctacaacaaaatacaggcaag')) + #rs.addRegion(Region(154412087,154412607,'gttccatagattgtacactaacattctctcatccaacctggtactgggaaaacctgctgaagatctgtgttttcatcttcgccttcattatgccagtgctcatcattaccgtgtgctatggactgatgatcttgcgcctcaagagtgtccgcatgctctctggctccaaagaaaaggacaggaatcttcgaaggatcaccaggatggtgctggtggtggtggctgtgttcatcgtctgctggactcccattcacatttacgtcatcattaaagccttggttacaatcccagaaactacgttccagactgtttcttggcacttctgcattgctctaggttacacaaacagctgcctcaacccagtcctttatgcatttctggatgaaaacttcaaacgatgcttcagagagttctgtatcccaacctcttccaacattgagcaacaaaactccactcgaattcgtcagaacactagagaccacccctccacggccaatacagtggatagaactaatcatcag')) + #rs.addRegion(Region(154428600,154428787,'gtggaattgaacctggactgtcactgtgaaaatgcaaagccttggccactgagctacaatgcagggcagtctccatttcccttcccaggaagagtctagagcattaattttgagtttgcaaaggcttgtaactatttcatatgatttttagagctgactatgacatgaaccctaaaattcctgttccc')) + #rs.produceReads(3,50) + + # Artificial SNP + rs = ReadSynthesizer('chr1') + rs.addRegion(Region( 0+1, 59+1,'aaataggtcccaaacgttacgca'+'G'+'tctatgcctgacaaagttgcgaccacttcctctgcc'))#c -> G + rs.addRegion(Region( 60+1,119+1,'ttgtgtgacacgccggagatagg'+'A'+'catcagcaagtacgttaagtacactgaacgaactgg'))#g -> A + rs.addRegion(Region(120+1,179+1,'aggtttctacatcgtgcgtgatggc'+'C'+'ctaggagaagtgggtgtatctgcacagcataagt'))#t -> C + rs.addRegion(Region(180+1,239+1,'tataagacggaagtaaagcgtcttc'+'G'+'ccgttcagcaccccacgctcatagtcaatgctgg'))#a -> G + #rs.addRegion(Region(240+1,299+1,'ttcagcatagtcaagcgccggtggcctccaaaaagacgcactgagtagcttagctacttt')) + #rs.addRegion(Region(300+1,359+1,'gctccgcttgcggaagcactaagaggagattgaatttccaaatcccccccgatacctgtg')) + #rs.addRegion(Region(360+1,419+1,'cggtcgctacgtaagtgcgaagttctgttagatacgctccccttagtatatgggcgttaa')) + #rs.addRegion(Region(420+1,479+1,'tcggaccgtcggtactcactgcattccaggtctcatatagttcgccctagaagcctggga')) + rs.addRegion(Region(480+1,539+1,'tgaacgttgaacta'+'GCC'+'ctgatgtaaaccccgcgtgccaattccaggcgtcatgggggca'))#tag -> gcc + #rs.addRegion(Region(540+1,599+1,'acccctcgcagcctccctcttgctgttggtgcctagtatttcatgatttcgagccgacat')) + rs.produceReads(2,35) |
b |
diff -r 0c5cc5763091 -r 9a39c4105901 test-data/hg19_mutant.2.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hg19_mutant.2.vcf Wed Feb 15 16:16:01 2017 -0500 |
b |
@@ -0,0 +1,25 @@ +##fileformat=VCFv4.1 +##source=VarScan2 +##INFO=<ID=ADP,Number=1,Type=Integer,Description="Average per-sample depth of bases with Phred score >= 15"> +##INFO=<ID=WT,Number=1,Type=Integer,Description="Number of samples called reference (wild-type)"> +##INFO=<ID=HET,Number=1,Type=Integer,Description="Number of samples called heterozygous-variant"> +##INFO=<ID=HOM,Number=1,Type=Integer,Description="Number of samples called homozygous-variant"> +##INFO=<ID=NC,Number=1,Type=Integer,Description="Number of samples not called"> +##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand"> +##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Raw Read Depth as reported by SAMtools"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Quality Read Depth of bases with Phred score >= 15"> +##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)"> +##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)"> +##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency"> +##FORMAT=<ID=PVAL,Number=1,Type=String,Description="P-value from Fisher's Exact Test"> +##FORMAT=<ID=RBQ,Number=1,Type=Integer,Description="Average quality of reference-supporting bases (qual1)"> +##FORMAT=<ID=ABQ,Number=1,Type=Integer,Description="Average quality of variant-supporting bases (qual2)"> +##FORMAT=<ID=RDF,Number=1,Type=Integer,Description="Depth of reference-supporting bases on forward strand (reads1plus)"> +##FORMAT=<ID=RDR,Number=1,Type=Integer,Description="Depth of reference-supporting bases on reverse strand (reads1minus)"> +##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)"> +##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 +chr6 154360797 . A G . PASS ADP=174;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:0:177:174:0:174:100%:9.8E-1:0:86:0:0:100:74 |
b |
diff -r 0c5cc5763091 -r 9a39c4105901 tool_dependencies.xml --- a/tool_dependencies.xml Thu Nov 05 09:59:46 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,14 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="samtools" version="0.1.19"> - <repository changeset_revision="96aab723499f" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - - <package name="samtools_parallel_mpileup" version="0.1.19-a"> - <repository changeset_revision="7dd0c6a9be41" name="package_samtools_parallel_mpileup_0_1_19_a" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - - <package name="varscan" version="2.3.6"> - <repository changeset_revision="6f8cead3dc93" name="varscan_version_2" owner="devteam" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency> |
b |
diff -r 0c5cc5763091 -r 9a39c4105901 varscan_mpileup2snp.xml --- a/varscan_mpileup2snp.xml Thu Nov 05 09:59:46 2015 -0500 +++ b/varscan_mpileup2snp.xml Wed Feb 15 16:16:01 2017 -0500 |
[ |
@@ -1,38 +1,35 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="varscan_mpileup2snp" name="VarScan2 Call SNPs from a mpileup file" version="2.3.6.a"> +<tool id="varscan_mpileup2snp" name="VarScan2 Call SNPs from a mpileup file" version="2.4.2.a"> <description>VarScan2 SNP/SNV detection; directly from a *.mpileup file.</description> <requirements> - <requirement type="package" version="2.3.6">varscan</requirement> + <requirement type="package" version="2.4.2">varscan</requirement> </requirements> - <version_command>java -jar $JAVA_JAR_PATH/VarScan.v2.3.6.jar 2>&1 | head -n 1</version_command> + <version_command>varscan 2>&1 | head -n 1</version_command> - <command> - cat $mpileup_input | java - -Xmx64G - -jar \$JAVA_JAR_PATH/VarScan.v2.3.6.jar - mpileup2snp - - #if $extended_parameters.parameters == "extended" - --min-coverage $extended_parameters.varscan_min_coverage - --min-reads2 $extended_parameters.varscan_min_reads2 - --min-avg-qual $extended_parameters.varscan_min_avg_qual - --min-var-freq $extended_parameters.varscan_min_var_freq - --min-freq-for-hom $extended_parameters.varscan_min_freq_for_hom - --p-value $extended_parameters.varscan_p_value - $extended_parameters.varscan_strand_filter - $extended_parameters.varscan_variants - #end if - - #if $varscan_output == "vcf" or $varscan_output.value == "vcf" - --output-vcf 1 - #end if - - 2> stderr.txt - > $snv_output ; - cat stderr.txt - </command> + <command detect_errors="exit_code"><![CDATA[ + varscan mpileup2snp + + '${mpileup_input}' + + #if $extended_parameters.parameters == "extended" + --min-coverage $extended_parameters.varscan_min_coverage + --min-reads2 $extended_parameters.varscan_min_reads2 + --min-avg-qual $extended_parameters.varscan_min_avg_qual + --min-var-freq $extended_parameters.varscan_min_var_freq + --min-freq-for-hom $extended_parameters.varscan_min_freq_for_hom + --p-value $extended_parameters.varscan_p_value + $extended_parameters.varscan_strand_filter + $extended_parameters.varscan_variants + #end if + + #if $varscan_output == "vcf" or $varscan_output.value == "vcf" + --output-vcf 1 + #end if + + > '${snv_output}' + ]]></command> <inputs> <param format="pileup" name="mpileup_input" type="data" label="Alignment file" help="Mapped reads in mpileup format."/><!-- datatype "mpileup" does not exist.. it seems to be common to use pileup instead? --> @@ -78,10 +75,27 @@ <output name="snv_output" file="hg19_mutant.vcf" /> </test> + <test> + <param name="mpileup_input" value="hg19_mutant.mpileup" dbkey="hg19" ftype="pileup" /> + <param name="parameters" value="extended" /> + + <param name="varscan_min_coverage" value="8" /> + <param name="varscan_min_reads2" value="2" /> + <param name="varscan_min_avg_qual" value="15" /> + <param name="varscan_min_var_freq" value="0.01" /> + <param name="varscan_min_freq_for_hom" value="0.75" /> + <param name="varscan_p_value" value="0.99" /> + <param name="varscan_strand_filter" value=" --strand_filter 1" /> + <param name="varscan_variants" value=" --variants 0" /> + + <param name="varscan_output_vcf" value="1" /> + + <output name="snv_output" file="hg19_mutant.2.vcf" /> + </test> </tests> <help> -**VarScan 2.3.6** +**VarScan 2.4.2** VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems. http://dx.doi.org/10.1101/gr.129684.111 @@ -92,42 +106,14 @@ Other people were aware of this and have written a version that can do parallelization: https://github.com/mydatascience/parallel-mpileup -Consequently, when a BAM files gets processed by this wrapper, it's processed by *parallel-mpileup* before its send to VarScan. - .. _VarScan: http://varscan.sourceforge.net/ **Input formats** VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing. -**Installation** - -Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment. - -**License** - -* VarScan2.3.6: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0) -* parallel-mpileup: MIT License (https://github.com/mydatascience/parallel-mpileup/blob/master/samtools-0.1.19/COPYING) - -Contact -------- - -The tool wrapper has been written by Youri Hoogstrate from the Erasmus -Medical Center (Rotterdam, Netherlands) on behalf of the Translational -Research IT (TraIT) project: - -http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch - -More tools by the Translational Research IT (TraIT) project can be found -in the following toolsheds: - -http://toolshed.dtls.nl/ - -http://toolshed.g2.bx.psu.edu/ - -http://testtoolshed.g2.bx.psu.edu/ </help> <citations> <citation type="doi">10.1101/gr.129684.111</citation> </citations> -</tool> \ No newline at end of file +</tool> |
b |
diff -r 0c5cc5763091 -r 9a39c4105901 varscan_mpileup2snp_from_bam.xml --- a/varscan_mpileup2snp_from_bam.xml Thu Nov 05 09:59:46 2015 -0500 +++ b/varscan_mpileup2snp_from_bam.xml Wed Feb 15 16:16:01 2017 -0500 |
[ |
b'@@ -1,102 +1,75 @@\n <?xml version="1.0" encoding="UTF-8"?>\n-<tool id="varscan_mpileup2snp_from_bam" name="VarScan2 Call SNPs from BAM" version="2.3.6.a">\n+<tool id="varscan_mpileup2snp_from_bam" name="VarScan2 Call SNPs from BAM" version="2.4.2.a">\n <description>VarScan2 SNP/SNV detection; directly reading *.bam file(s) & using parallel mpileup generation, to avoid unnecessairy I/O overhead and increase performance.</description>\n \n <requirements>\n- <requirement type="package" version="0.1.19-a">samtools_parallel_mpileup</requirement>\n- <requirement type="package" version="0.1.19">samtools</requirement>\n- <requirement type="package" version="2.3.6">varscan</requirement>\n+ <requirement type="package" version="2.4.2">varscan</requirement>\n+ <requirement type="package" version="0.6.5">sambamba</requirement>\n </requirements>\n \n- <version_command>java -jar $JAVA_JAR_PATH/VarScan.v2.3.6.jar 2>&1 | head -n 1</version_command>\n+ <version_command>varscan 2>&1 | head -n 1</version_command>\n \n- <command>\n- #if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1\n- echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that \'tool-data/all_fasta.loc\' is configured properly!" >&2\n- #else\n- #import os.path\n+ <command detect_errors="exit_code"><![CDATA[\n+ #for $alignment in $alignments\n+ ln -f -s \'${alignment.metadata.bam_index}\' \'${alignment}.bai\' &&\n+ #end for\n+ \n+ sambamba mpileup\n+ -t \\${GALAXY_SLOTS:-4}\n+ \n #for $alignment in $alignments\n- <!-- @todo use the existence of $alignment.metadata.bam_index or $alignment.metadata[\'bam_index\'] -->\n- #if not os.path.isfile(str($alignment)+".bai")\n- echo "- Indexing alignment file: $alignment.name " ; \n- samtools index $alignment 2>&1 ; \n- #else\n- echo "- Skiping indexing: $alignment.name " ; \n- #end if\n+ \'${alignment}\'\n #end for\n \n- #if $mpileup_parallelization.mpileup_parallelization_select == "true"\n- samtools-parallel-mpileup mpileup\n- -t $mpileup_parallelization.samtools_threads\n- #else\n- samtools mpileup\n- #end if\n- -f \n- #if $reference_genome_source.source_select == "indexed_filtered"\n- "$reference_genome_source.reference_genome"\n- #else if $reference_genome_source.source_select == "indexed_all"\n- "$reference_genome_source.reference_genome"\n- #else if $reference_genome_source.source_select == "history"\n- "$reference_genome_source.reference_genome"\n- #else\n- <!--\n- This is a workaround to obtain the "genome.fa" file that\n- corresponds to the dbkey of the alignments.\n- Because this file is "calculated" during run-time, it can\n- be used in a workflow.\n- -->\n- "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ \'all_fasta\' ].get_fields() )[0][-1] }"\n- #end if\n- \n- #if $extended_parameters_regions.samtools_regions == "region"\n- -r $extended_parameters_regions.samtools_r\n- #elif $extended_parameters_regions.samtools_regions == "regions'..b'ambamba! -->\n- <param name="alignments" value="example.bam" ftype="bam" />\n- \n- <param name="source_select" value="history" />\n- <param name="reference_genome" value="example.fa" ftypet="fasta" />\n- \n- <param name="samtools_regions" value="entire_genome" />\n- \n- <param name="mpileup_parallelization_select" value="true" />\n- <param name="samtools_threads" value="2" />\n- <param name="sort_mpileup" value="true" />\n+ <param name="sambamba_regions" value="entire_genome" />\n \n <param name="parameters" value="default" />\n <param name="varscan_output_vcf" value="1" />\n@@ -286,53 +215,29 @@\n </tests>\n \n <help>\n-**VarScan 2.3.6**\n+**VarScan 2.4.2**\n \n-VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems.\n+VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments.\n http://dx.doi.org/10.1101/gr.129684.111\n http://www.ncbi.nlm.nih.gov/pubmed/19542151\n \n-*VarScan* requires mpileup formatted input files, which are generally derived from BAM files. Since mpileup files can become humongous, the interim step of storing it is bypassed. Thus, in this wrapper one or multiple BAM/SAM files go in, get processed into a mpileup file and get directly linked to VarScan.\n-The samtools package is not able to parallelize the mpileup generation which make it a very slow process.\n-Other people were aware of this and have written a version that can do parallelization:\n-https://github.com/mydatascience/parallel-mpileup\n-\n-Consequently, when a BAM files gets processed by this wrapper, it\'s processed by *parallel-mpileup* before its send to VarScan.\n+*VarScan* requires mpileup input files, generally derived from BAM files. Since mpileup files can become humongous, the interim step of storing can be by-passed using this tool.\n+Thus, in this wrapper one or multiple BAM/SAM files go in, get processed into a mpileup file and get directly linked to VarScan.\n \n .. _VarScan: http://varscan.sourceforge.net/\n \n **Input formats**\n \n-VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing.\n-\n-**Installation**\n-\n-Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment.\n-\n-**License**\n-\n-* VarScan2.3.6: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0)\n-* parallel-mpileup: MIT License (https://github.com/mydatascience/parallel-mpileup/blob/master/samtools-0.1.19/COPYING)\n+VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/).\n+The alignment files must have a reference genome (dbkey) in Galaxy.\n \n Contact\n -------\n \n The tool wrapper has been written by Youri Hoogstrate from the Erasmus\n-Medical Center (Rotterdam, Netherlands) on behalf of the Translational\n-Research IT (TraIT) project:\n-\n-http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch\n-\n-More tools by the Translational Research IT (TraIT) project can be found\n-in the following toolsheds:\n-\n-http://toolshed.dtls.nl/\n-\n-http://toolshed.g2.bx.psu.edu/\n-\n-http://testtoolshed.g2.bx.psu.edu/\n+Medical Center (Rotterdam, Netherlands) \n </help>\n <citations>\n <citation type="doi">10.1101/gr.129684.111</citation>\n </citations>\n-</tool>\n\\ No newline at end of file\n+</tool>\n' |