changeset 10:a7e7f9b8f538 draft

Uploaded
author geert-vandeweyer
date Sat, 27 Sep 2014 05:36:34 -0400
parents 7cbbf8aaa46c
children 845a87ad254a
files DC_Genotyper.pl DC_Genotyper.xml DC_Genotyper_indexes.loc.sample dbsnp_indexes.loc.sample readme.rst tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 7 files changed, 8 insertions(+), 667 deletions(-) [+]
line wrap: on
line diff
--- a/DC_Genotyper.pl	Sat Sep 27 02:47:31 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,530 +0,0 @@
-#!/usr/bin/perl
-#############################
-## DEEP COVERAGE GENOTYPER ##
-#############################
-# version : 0.0.1
-# Principle: 
-#  1. get allele counts on all positions in specified targets (bed) using igvtools. Only SNPs !!
-#  2. remove known dbsnp positions (bcf file)
-#  3. Get distribution of background noise (pcr/sequencing errors), by modelling allele fractions as normal distributions.
-#  4. Based on these distributions, check each position for significant change from the reference allele (based on allele fraction)
-#  5. For abberant positions, check each alternate allele to see if it passes the background signal. 
-#  6. Generate VCF file. 
-
-
-##################
-## LOAD MODULES ##
-##################
-use threads;
-use threads::shared;
-use Thread::Queue;
-use Getopt::Std;
-
-####################
-## get paramaters ##
-####################
-# t: target file
-# b: bam file
-# R: 2bit version of reference fasta.
-# p: number of threads.
-# s: dbsnp file
-# m: minimal coverage (defaults 400x)
-# P: ploidy
-# a: outfile for allele distributions
-# v: vcf file output.
-getopts('t:b:R:p:s:m:P:v:a:', \%opts) ;
-
-## variables
-my $twobit :shared;
-my $igvgenome :shared;
-if (!defined($opts{'R'})) {
-	die("Reference Genomes not specified\n");
-}
-my @refgenomes = split(",",$opts{'r'});
-if (!-e $refgenomes[0]) {
-	die("'$refgenomes[0]' is not a valid file path.");
-}
-else {
-	$twobit = $refgenomes[0];
-}
-if (!-e $refgenomes[1]) {
-	die("'$refgenomes[1]' is not a valid file path.");
-}
-else {
-	$igvgenome = $refgenomes[1];
-}
-
-
-my $mincov :shared;
-$mincov = 320;
-if (defined($opts{'m'})) {
-	$mincov = $opts{'m'};
-}
-
-my $ploidy :shared;
-if (defined($opts{'P'}) && $opts{'P'} =~ m/^\d+$/) {
-	$ploidy = $opts{'P'};
-}
-else {
-	die("Ploidy (-P) was not specified or not an integer\n");
-}
-
-
-if (defined($opts{'v'})) {
-	$outfile = $opts{'v'};
-}
-else {
-	die("No output vcf-file specified.\n");
-}
-if (!defined($opts{'a'})) {
-	die("No output file specified for distribution details\n");
-}
-## create working dir.
-my $rand = int(rand(10000));
-while (-d "/tmp/DC_Genotyper_$rand") {
-	$rand = int(rand(10000));
-}
-my $wd :shared;
-$wd = "/tmp/DC_Genotyper_$rand";
-system("mkdir '$wd'");
-
-
-my $snpfile :shared;
-my $hassnp :shared;
-$hassnp = 'NoDbSNP';
-$snpfile = '';
-if (defined($opts{'s'})) {
-	$snpfile = $opts{'s'};
-	if (!-e $snpfile) {
-		die("'$snpfile' is not a valid file path.");
-	}
-
-	my $mime = `file $snpfile`;
-	if ($mime !~ m/compressed/) {
-		print "$snpfile is not in compressed format. compressing & indexing the file now.\n";
-		#print "... this takes a while\n";
-		system("bgzip -c $snpfile > $wd/dbSNP.vcf.bgz");
-		system("cd $wd/ && tabix -p vcf dbSNP.vcf.bgz");
-		$snpfile = "$wd/dbSNP.vcf.bgz";
-	}
-	elsif (!-e "$snpfile.tbi") {
-		print "tabix index file is missing for '$snpfile'. creating now.\n";
-		## check if I can write it out for future use
-		$snpfile =~ m/(.*)([^\/]+)$/;
-		my $d = $1;
-		if (-w $d) {
-			open OUT, ">$d/lock";
-			flock(OUT,2);
-			system("cd $d && tabix -p vcf $snpfile");
-			close OUT;
-			system("rm $d/lock");
-		}
-		else {
-			system("cp $snpfile /$wd/dbSNP.vcf.bgz");
-			system("cd $wd/ && tabix -p vcf dbSNP.vcf.bgz");
-			$snpfile = "$wd/dbSNP.vcf.bgz";
-		}
-	}
-	$hassnp = 'WithDbSNP';
-}
-
-	
-## 1. Get FASTA and prepare output hashes: 
-my $targets_one = Thread::Queue->new();
-my $targets_two = Thread::Queue->new();
-my $targets_three = Thread::Queue->new();
-open IN, $opts{'t'} or die("Could not open $opts{'t'} file for reading");
-if (-d "$wd/Fasta/") {
-	system("rm $wd/Fasta/*");
-}
-else {
-	system("mkdir $wd/Fasta");
-}
-## create the threads.
-for (my $i = 1; $i<= $opts{'p'}; $i++) {
-	${'thr'.$i} = threads->create('FetchFasta');
-}
-
-## enqueue the targets.
-my %thash;
-while (<IN>) {
-	chomp;
-	my ($chr,$start,$stop,$name,$score,$strand) = split(/\t/,$_);
-	$targets_one->enqueue($_);
-	$targets_two->enqueue($_);
-	$targets_three->enqueue($_);
-	$thash{$chr}{$start} = $stop;
-}
-close IN;
-
-## end the threads.
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	$targets_one->enqueue(undef);
-}
-
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	${'thr'.$i}->join();
-}
-
-## load dbSNP inside target regions into shared structure.
-##########################################################
-my %dbsnp :shared;
-if ($snpfile ne '') {
-	my $bcf = `which bcftools`;
-	chomp($bcf);
-	if ($bcf ne '') {
-		my $command = "bcftools query -f '\%CHROM\\t\%POS\\t\%REF\\t\%ALT\\t\%ID\\n' -R '".$opts{'t'}."' '$snpfile' > $wd/dbsnp.txt";
-		system("$command");
-		open IN, "$wd/dbsnp.txt";
-		while (<IN>) {
-			chomp;
-			my @p = split(/\t/,$_);
-			$dbsnp{$p[0].'-'.$p[1]} = $p[2].'-'.$p[3].'-'.$p[4];
-		}
-		close IN;
-	}
-	else {
-		print "WARNING: BCFtools is not in the path. Skipping snp handling.\n";
-		$snpfile = '';
-		system("touch $wd/dbsnp.txt");
-	}
-}
-else {
-	system("touch $wd/dbsnp.txt");
-}
-
-## now process the bam file.
-mkdir "$wd/WIGS/";
-my $bam :shared;
-$bam = $opts{'b'};
-my $bai = $bam.".bai";
-if (!-e $bai) {
-	print "BAI ($bai) missing for $bam : creating\n";
-	system("samtools index $bam"); 
-} 
-
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	${'thr'.$i} = threads->create('CountAlleles');
-}
-## end the threads.
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	$targets_two->enqueue(undef);
-}
-
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	${'thr'.$i}->join();
-}
-
-## generate the distributions.
-##############################
-my $alleles = Thread::Queue->new();
-my %all = ('A' => 1,'C' => 2,'G' => 3, 'T' => 4);
-foreach(keys(%all)) {
-	$alleles->enqueue($_);
-	my  $a = $_;
-	foreach(keys(%all)) {
-		if ($_ eq $a) {
-			next;
-		}
-		$alleles->enqueue($a.'-'.$_);
-	}
-}
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	${'thr'.$i} = threads->create('GetDistribution');
-}
-## end the threads.
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	$alleles->enqueue(undef);
-}
-
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	${'thr'.$i}->join();
-}
-
-## group distributions into one file
-####################################
-my %map =('A' => 2,'C' => 3,'G' => 4, 'T' => 5);
-open OUT, ">".$opts{'a'};
-print OUT "allele\tavg\tsd\n";
-foreach(keys(%map)) {
-	my $r = $_;
-	my $f = "$wd/model.$r.$mincov"."x.$hassnp.txt";
-	open IN, "$f";
-	my $a = <IN>;
-	chomp($a);
-	#$dists{$r}{'avg'} = $a;
-	my $s = <IN>;
-	chomp($s);
-	#$dists{$r}{'sd'} = $s;
-	close IN;
-	print OUT "$r\t$a\t$s\n";
-	foreach(keys(%map)) {
-		if ($_ eq $r) {
-			next;
-		}
-		my $f = "$wd/model.$r-$_.$mincov"."x.$hassnp.txt";
-		open IN, "$f";
-		my $a = <IN>;
-		chomp($a);
-		my $s = <IN>;
-		chomp($s);
-		close IN;
-		print OUT "$r-$_\t$a\t$s\n";
-	}
-}
-close OUT;
-
-## CALL SNPs
-############
-# create the R script.
-open R, ">$wd/CallSNPs.R";
-print R "args <- commandArgs(trailingOnly = TRUE)\n";
-print R "counts <- read.table(file=args[1],header=FALSE, as.is=TRUE)\n";
-print R "ploidy <- as.integer(args[3])\n";
-print R "chr <- args[2]\n";
-print R "snps <- read.table(file=args[5],header=FALSE,as.is=TRUE)\n";
-print R "colnames(snps) <- c('chr','pos','ref','alt','id')\n";
-print R "colnames(counts) <- c('pos','ref','A','C','G','T','TotalDepth')\n";
-print R "dists <- read.table(file='$wd/allelic_distributions.txt',header=TRUE,as.is=TRUE)\n";
-print R 'rownames(dists) = dists$allele'."\n";
-print R 'dists <- dists[,-1]'."\n";
-print R "vcf <- c()\n";
-print R "lower <- c()\n";
-print R "higher <- c()\n";
-print R "for (i in 1:(ploidy)) {\n";
-print R "  lower[length(lower)+1] <- (2*i-1)/(2*ploidy)\n";
-print R "  higher[length(higher)+1] <- (2*i+1)/(2*ploidy)\n";
-print R "}\n";
-print R "for (i in 1:nrow(counts)) {\n";
-print R "  if (counts[i,'TotalDepth'] == 0) next\n";
-print R "  # significantly different from reference?\n";
-print R "  z <- ((counts[i,counts[i,'ref']]/counts[i,'TotalDepth']) - dists[counts[i,'ref'],'avg']) /  dists[counts[i,'ref'],'sd']\n";
-print R "  if (abs(z) > 3) {\n";
-print R "    # test all alterate alleles to see which one is significant.\n";
-print R "    for (j in c('A','C','G','T')) {\n";
-print R "      if (j == counts[i,'ref']) next\n";
-print R "      z <- ((counts[i,j]/counts[i,'TotalDepth']) - dists[paste(counts[i,'ref'],'-',j,sep=''),'avg']) /  dists[paste(counts[i,'ref'],'-',j,sep=''),'sd']\n";
-print R "      if (abs(z) > 3){\n";
-print R "         filter <- 'PASS'\n";
-print R "         phred <- round(-10*log(pnorm(-abs(z))),digits=0)\n";
-print R "         if (phred > 9999) phred <- 9999\n";
-print R "         frac <- counts[i,j]/counts[i,'TotalDepth']\n";
-print R "         for (k in 1:ploidy) {\n";
-print R "            if (frac >= lower[k] && frac < higher[k]) {\n";
-print R "		sample <- paste(paste(paste(rep(0,(ploidy-k)),sep='',collapse='/'),paste(rep(1,k),sep='',collapse='/'),sep='/',collapse=''),':',counts[i,counts[i,'ref']],',',counts[i,j],sep='',collapse='')\n";
-print R "                af <- k/ploidy\n";
-print R "                break\n";
-print R "            }\n";
-print R "         }\n";
-print R "         if (frac < lower[1]) {\n";
-print R "            sample <- paste(paste(paste(rep(0,(ploidy-1)),sep='',collapse='/'),paste(rep(1,1),sep='',collapse='/'),sep='/',collapse=''),':',counts[i,counts[i,'ref']],',',counts[i,j],sep='',collapse='')\n";
-print R "            af <- 1/ploidy\n";
-print R "            filter <- 'LowFraction'\n";
-print R "         }\n";
-print R "         if (counts[i,'TotalDepth'] < $mincov) {\n";
-print R "            filter <- 'LowCoverage'\n";
-print R "         }\n";
-print R "         info <- paste('DP=',counts[i,'TotalDepth'],';AF=',round(af,digits=5),';AR=',round(frac,digits=5),sep='')\n";
-print R "	  snpids <- which(snps\$chr == chr & snps\$pos == counts[i,'pos'])\n";
-print R "         id <- '.'\n";
-print R "         if (length(snpids) > 0) id <- snps[snpids[1],'id']\n";
-print R "	  vcf[length(vcf)+1] <- paste(chr,counts[i,'pos'],id,counts[i,'ref'],j,phred,filter,info,'GT:AD',sample,sep='\\t',collapse='')\n";
-print R "      }\n";
-print R "     }\n";
-print R "  }\n";
-print R "}\n";
-print R "if (length(vcf) > 0) {\n";
-print R "   write(file=args[4],paste(vcf,sep='\\n'))\n";
-print R "}\n";
-close R;
-system("mkdir $wd/VCF/");
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	${'thr'.$i} = threads->create('CallSNPs');
-}
-## end the threads.
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	$targets_three->enqueue(undef);
-}
-
-for (my $i = 1; $i <= $opts{'p'}; $i++) {
-	${'thr'.$i}->join();
-}
-
-## BUILD FINAL VCF
-open OUT, ">$outfile";
-print OUT "##fileformat=VCFv4.1\n";
-print OUT "##source=High_Ploidy_Genotyper_v.0.1\n";
-print OUT "##genome_reference=$twobit\n";
-if ($snpfile ne '') {
-	print OUT "##SNP_file=$snpfile\n";
-}
-foreach(keys(%thash)) {
-	print OUT "##contig=<ID=$_,assembly=hg19,species=\"Homo Sapiens\">\n";
-}
-print OUT "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">\n";
-print OUT "##INFO=<ID=AF,Number=1,Type=Float,Description=\"Allele Frequency\">\n";
-print OUT "##INFO=<ID=AR,Number=1,Type=Float,Description=\"Allelic Ratio\">\n";
-print OUT "##FILTER=<ID=LowFraction,Description=\"Allelic Fraction under 1/2*$ploidy\">\n";
-print OUT "##FILTER=<ID=LowCoverage,Description=\"Total Depth is lower than threshold of $mincov\">\n";
-print OUT "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n";
-print OUT "##FORMAT=<ID=AD,Number=2,type=Integer,Description,\"Allelic Depth\">\n";
-print OUT "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE\n";
-close OUT;
-@i = ( 1 .. 22,'X','Y','M' );
-foreach(@i) {
-	my $chr = "chr$_";
-	foreach(sort {$a <=> $b} keys(%{$thash{$chr}})) {
-		my $v = "$wd/VCF/$chr.$_-".$thash{$chr}{$_}.".vcf";
-		if (-e $v) {
-			system("cat '$v' >> '$outfile'");
-		}
-	}
-}
-
-## clean up
-system("rm -Rf '$wd'");
-
-sub FetchFasta  {
-	while(defined(my $line = $targets_one->dequeue())) {
-		my ($chr,$start,$stop,$name,$score,$strand) = split(/\t/,$line);
-		# 2bit is zero based, non-including => decrease start by one
-		$startposition = $start - 1;
-		my $command = "twoBitToFa -seq=$chr -start=$startposition -end=$stop -noMask $twobit $wd/Fasta/$chr-$start-$stop.fasta";
-		system($command);
-	}
-}
-
-sub CountAlleles {
-	# local version of hashes
-	my $snp = \%dbsnp;
-	my %counts;
-	$counts{'A'} = '';
-	$counts{'C'} = '';
-	$counts{'G'} = '';
-	$counts{'T'} = '';
-	my %map =('A' => 1,'C' => 2,'G' => 3, 'T' => 4);
-	my %options;
-	foreach(keys(%map)) {
-		my $r = $_;
-		foreach(keys(%map)) {
-			if ($_ eq $r) {
-				next;
-			}
-			$options{$r.'-'.$_} = '';
-		}
-	}
-	while (defined(my $line = $targets_two->dequeue())) {
-		$out = '';
-		my ($chr,$start,$stop,$name,$score,$strand) = split(/\t/,$line);
-		## get reference alleles
-		my %ref_alleles;
-		open FASTA, "$wd/Fasta/$chr-$start-$stop.fasta";
-		my $head = <FASTA>;
-		my $seq = '';
-		while (<FASTA>) { 
-			chomp;
-			$seq .= $_;
-		}
-		close FASTA;
-		# this generates a hash of the reference alleles once, instead of substr-calls in every bam, on every iteration.
-		for (my $pos = 0; $pos < length($seq); $pos++) {
-			$ref_alleles{($pos+$start)} = substr($seq,$pos,1);
-		}
-		## get counts.
-		my $target = "$chr:$start-$stop";
-		my $command = "igvtools count -w 1 --bases --query '$target' '$bam' '$wd/WIGS/$chr-$start-$stop.wig' '$igvgenome' > /dev/null 2>&1";
-		system($command);
-		open WIG, "$wd/WIGS/$chr-$start-$stop.wig";
-		my $h = <WIG>;
-		$h = <WIG>;
-		$h = <WIG>;
-		my $target_counts = '';
-		while (<WIG>) {	
-			chomp;
-			#my ($pos, $a, $c, $g, $t , $n) = split(/\t/,$_);
-			my @p = split(/\t/,$_);
-			my $s = $p[1] + $p[2] + $p[3] + $p[4];
-			$target_counts .= "$p[0]\t$ref_alleles{$p[0]}\t$p[1]\t$p[2]\t$p[3]\t$p[4]\t$s\n";
-			## skip positions with coverage < minimal coverage, and positions in dbsnp if specified (if not specified, snp hash is empty).
-			if ($s > $mincov && !defined($snp->{$chr.'-'.$p[0]})) {
-				## for model of 'non-reference' 
-				my $frac = $p[$map{$ref_alleles{$p[0]}}] / $s;
-				$counts{$ref_alleles{$p[0]}} .= $frac.',';
-				$out .= "$target\t$p[0]\t$ref_alleles{$p[0]}\t$p[1]\t$p[2]\t$p[3]\t$p[4]\n";
-				## for each of the options background models
-				foreach(keys(%map)) {
-					if ($_ eq $ref_alleles{$p[0]}) {
-						next;
-					}
-					$options{$ref_alleles{$p[0]}.'-'.$_} .= ($p[$map{$_}] / $s) .',';
-				}
-					 
-			}
-		}
-		close WIG;
-		open OUT, ">>$wd/allcounts.$mincov"."x.$hassnp.txt";
-		flock(OUT, 2);
-		print OUT $out;
-		close OUT;
-		open OUT, ">$wd/WIGS/$chr.$start-$stop.txt";
-		print OUT $target_counts;
-		close OUT;
-			
-	}
-	foreach(keys(%counts)) {
-		open OUT, ">>$wd/counts_$_.$mincov"."x.$hassnp.txt";
-		flock(OUT,2);
-		print OUT $counts{$_};
-		close OUT;
-	}
-	foreach(keys(%options)) {
-		open OUT, ">>$wd/counts_$_.$mincov"."x.$hassnp.txt";
-		flock(OUT,2);
-		print OUT $options{$_};
-		close OUT;
-	}
-}
-
-sub GetDistribution {
-	while (defined(my $allele = $alleles->dequeue())) {
-		system("sed -i 's/.\$//' '$wd/counts_$allele.$mincov"."x.$hassnp.txt'");
-		open OUT, ">$wd/GetDistribution.$allele.R";
-		print OUT "sample <- '$bam'\n";
-		print OUT "nt <- '$allele'\n";
-		#print OUT "pdf(file='$wd/Distribution.$allele.$mincov"."x.$hassnp.pdf',paper='a4')\n";
-		print OUT "data <- scan(file='$wd/counts_$allele.$mincov"."x.$hassnp.txt',sep=',')\n";
-		print OUT "nr <- length(data)\n";
-		print OUT "avg <- mean(data)\n";
-		print OUT "sdd <- sd(data)\n";
-		#print OUT "if (avg > 0.5) {\n";
-		#print OUT "  x <- seq(0.8,1,length=1000)\n";
-		#print OUT "  y <- dnorm(x,mean=avg,sd=sdd)\n";
-		#print OUT "  plot(x,y,main='Distribution in sample $bam for nt $allele',xlab='Allelic Ratio',type='l',lwd=1)\n";
-		#print OUT "  abline(v=(avg-3*sdd),col='red')\n";
-		#print OUT "  text(0.81,max(y-0.5),paste(c('avg: ',avg,'\\nsd: ',sdd,'\\nnrDataPoints:', nr,'\\n$hassnp\\nMin.Cov:',$mincov),sep=' ',collapse=''),adj=c(0,1))\n";
-		#print OUT "} else {\n";
-		#print OUT "  x <- seq(0,0.3,length=1000)\n";
-		#print OUT "  y <- dnorm(x,mean=avg,sd=sdd)\n";
-		#print OUT "  plot(x,y,main='Distribution in sample $bam for nt $allele',xlab='Allelic Ratio',type='l',lwd=1)\n";
-		#print OUT "  abline(v=(avg+3*sdd),col='red')\n";
-		#print OUT "  text(0.2,max(y-0.5),paste(c('avg: ',avg,'\\nsd: ',sdd,'\\nnrDataPoints:', nr,'\\n$hassnp\\nMin.Cov:',$mincov),sep=' ',collapse=''),adj=c(0,1))\n";
-		#print OUT "}\n";
-		#print OUT "dev.off()\n";
-		print OUT "write(c(avg,sdd),file='$wd/model.$allele.$mincov"."x.$hassnp.txt',ncolumns=1)\n";
-		close OUT; 
-		system("cd $wd && Rscript GetDistribution.$allele.R >/dev/null 2>&1");
-	}
-}
-
-
-sub CallSNPs {
-	while (defined(my $line = $targets_three->dequeue())) {
-		# split.
-		my ($chr,$start,$stop,$name,$score,$strand) = split(/\t/,$line);	
-		my $file = "$wd/WIGS/$chr.$start-$stop.txt";
-		my $ofile = "$wd/VCF/$chr.$start-$stop.vcf";
-		system("cd $wd && Rscript CallSNPs.R '$file' '$chr' '$ploidy' '$ofile' '$wd/dbsnp.txt'");
-	}
-
-}
-
--- a/DC_Genotyper.xml	Sat Sep 27 02:47:31 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-<tool id="DC_Genotyper" name="DC Genotyper" version='0.0.1'>
-	<description></description>
-	<requirements>
-		<requirement type='package' version='3.0.2'>R_3_0_2</requirement>
-		<requirement type='package' version='0.1.18'>samtools</requirement>
-		<requirement type='package' version='0.2.6'>tabix</requirement>
-		<requirement type='package' version='latest'>blat_server</requirement>
-		<requirement type='package' version='1.92'>perl_module_threads</requirement>
-		<requirement type='package' version='1.46'>perl_module_threads_shared</requirement>
-		<requirement type='package' version='3.02'>perl_module_Thread_Queue</requirement>
-		<requirement type='package' version='2.3.32'>igvtools</requirement>
-        </requirements>
-	<command interpreter="perl">DC_Genotyper.pl 
-		-t "$targets" 
-		-b "$bamfile" 
-		-R "${ref.fields.path}" 
-		-p  "\${GALAXY_SLOTS:-4}"
-		#if $dbsnp.source == "history":
-			-s "${dbsnp.ownFile}"
-		#else 
-			-s "${dbsnp.indices.fields.path}"
-		#end if 
-		-m $mincov
-		-P $ploidy
-		
-		-a $output1 
-		-v $output2 
-	</command>	
-
-	<inputs>
-		<param  name="bamfile" type="data" format="bam" label="Sample BAM file" />
-		<param  name="targets" type="data" format="bed" label="Enrichment BED file" /> 
-		<param name="ref" type="select" label="Select a reference genome">
-          		<options from_data_table="DC_Genotyper_indexes">
-            		  <filter type="sort_by" column="2" />
-            		    <validator type="no_options" message="No indexes are available" />
-          		</options>
-        	</param>
-		<conditional name="dbsnp">
-		  <param name="source" type="select" label="Will you select a dbSNP file from your history, or use a built in version (which is faster)">
-			<option value="indexed">Use a built-in version</option>
-			<option value="history">Use one from the history</option>
-		  </param>
-		  <when value="indexed">
-			 <param name="indices" type="select" label="Select a dbSNP version">
-          		 	<options from_data_table="dbsnp_indexes">
-                         		<filter type="sort_by" column="2" />
-            				<validator type="no_options" message="No indexes are available" />
-          			</options>
-        		</param>
-		  </when>
-                  <when value="history">
-			<param name="ownFile" type="data" format="vcf,bcf" label="Select a dbSNP file from history"/>
-		  </when>
-          	</conditional>
-		<param name="mincov" value="400" type="integer" label="Minimal Coverage Depth" />
-		<param name="ploidy" type="integer" value='10' label="Expected Sample Ploidy" />
-	</inputs>
-	
-	<outputs>
-		<data format='txt' name="output1" label="${tool.name} on ${on_string}: Allele Fraction Distributions"/>
-		<data format='vcf' name='output2' label="${tool.name} on ${on_string}: VCF file" />
-	</outputs>
-<help>
-
-**What it does**
-
-	1. get allele counts on all positions in specified targets (bed) using igvtools. Only SNPs !!
-	2. remove known dbsnp positions (bcf file)
-	3. Get distribution of background noise (pcr/sequencing errors), by modelling allele fractions as normal distributions.
-	4. Based on these distributions, check each position for significant change from the reference allele (based on allele fraction)
-	5. For abberant positions, check each alternate allele to see if it passes the background signal. 
-	6. Generate VCF file. 
-
-
-**Information**
-
-This tools is created by Geert Vandeweyer. It is a very early version with several limitations. Current limitations are : no support for indels, no plotting of the noise-models, incorrect syntax in for multi-allelic sites in the VCF file.
-
-Any feedback is welcome. 
-
-</help>
-</tool>
--- a/DC_Genotyper_indexes.loc.sample	Sat Sep 27 02:47:31 2014 -0400
+++ b/DC_Genotyper_indexes.loc.sample	Sat Sep 27 05:36:34 2014 -0400
@@ -5,11 +5,11 @@
 #the directories in which those files are stored. The DC_Genotyper.loc
 #file has this format (white space characters are TAB characters):
 #
-#<unique_build_id>	<display_name>		<2bit_path,IGVtools_genome.path>
+#<unique_build_id>	<dbkey>	<display_name>		<2bit_path,IGVtools_genome.path>
 #
 #for example:
 #
-#hg19	Human (Homo sapiens): hg19		/depot/data2/galaxy/twobit/hg19.2bit,/depot/data2/galaxy/igvtools/hg19.chrom.sizes
+#hg19	hg19	Human (Homo sapiens): hg19		/depot/data2/galaxy/twobit/hg19.2bit,/depot/data2/galaxy/igvtools/hg19.chrom.sizes
 #
 #then your /depot/data2/galaxy/twobit/ directory
 #would need to contain the following 2bit files:
--- a/dbsnp_indexes.loc.sample	Sat Sep 27 02:47:31 2014 -0400
+++ b/dbsnp_indexes.loc.sample	Sat Sep 27 05:36:34 2014 -0400
@@ -5,13 +5,13 @@
 #the directories in which those files are stored. The dbsnp_indexes.loc
 #file has this format (white space characters are TAB characters):
 #
-#<unique_build_id>	<display_name>		</path/to/dbSNP.vcf.bgz>
+#<unique_build_id>	<dbkey>	<display_name>		</path/to/dbSNP.vcf.bgz>
 #
 #for example:
 #
-#hg19	Human (Homo sapiens): hg19		/depot/data2/galaxy/dbsnp/dbsnp_137.hg19.vcf.bgz
-#hg19	Human (Homo sapiens): hg19		/depot/data2/galaxy/dbsnp/dbsnp_138.hg19.vcf.bgz
-#mm9	Mouse (Mus musculus): mm9		/depot/data2/galaxy/dbsnp/dbsnp_128.mm9.vcf.bgz
+#hg19	hg19	Human (Homo sapiens): hg19		/depot/data2/galaxy/dbsnp/dbsnp_137.hg19.vcf.bgz
+#hg19	hg19	Human (Homo sapiens): hg19		/depot/data2/galaxy/dbsnp/dbsnp_138.hg19.vcf.bgz
+#mm9	mm9	Mouse (Mus musculus): mm9		/depot/data2/galaxy/dbsnp/dbsnp_128.mm9.vcf.bgz
 #
 #then your /depot/data2/galaxy/dbsnp/ directory
 #would need to contain the following files:
--- a/readme.rst	Sat Sep 27 02:47:31 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-BACKGROUND:
-
-DC_Genotyper stands for Deep-Coverage Genotyper, and is aimed at detecting low fraction SNPs (no indels) in high-ploidy (or pooled) samples with very high coverage. It is being developed at the University of Antwerp by Geert Vandeweyer. 
-
-METHOD:
-
-DC_Genotyper generates a background noise distributions on a per-sample basis, and uses these distrubutions to detect non-reference sites. For non-reference sites, Allele-specific distributions are used to estimate if an allele surpasses the background signal (e.g. A_to_G has different distribution as A_to_C, also reflected in Tr/Tv ratios). 
-
-LIMITATION:
-
-This is a very early version with several limitations. Current limitations are : no support for indels, no plotting of the noise-models, incorrect syntax in for multi-allelic sites in the VCF file.
-
-Any feedback is welcome
-
-
-INSTALLATION:
-
-After installation, complete the dbsnp.loc and dc_genotyper_indexes.loc files. The DC_genotyper_indexes.loc file has a specific format, specifying two index files per genome in a single line. Make sure you use comma to seperate these. DCG supports multithreading, but keep tests have shown that using more than 6-8 threads will lead to I/O bottlenecks.  
--- a/tool_data_table_conf.xml.sample	Sat Sep 27 02:47:31 2014 -0400
+++ b/tool_data_table_conf.xml.sample	Sat Sep 27 05:36:34 2014 -0400
@@ -2,11 +2,11 @@
 <tables>
     <!-- Locations of all index files under genome directory -->
     <table name="DC_Genotyper_indexes" comment_char="#">
-        <columns>name,value, path</columns>
+        <columns>value, dbkey, name, path</columns>
         <file path="DC_Genotyper_indexes.loc" />
     </table>
     <table name="dbsnp_indexes" comment_char="#">
-        <columns>name,value, path</columns>
+        <columns>value, dbkey, name, path</columns>
         <file path="dbsnp_indexes.loc" />
     </table>
 
--- a/tool_dependencies.xml	Sat Sep 27 02:47:31 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="R_3_0_2" version="3.0.2">
-        <repository changeset_revision="50f7e1e71271" name="package_r_3_0_2" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-   <package name="samtools" version="0.1.18">
-	<repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
-   </package>
-   <package name="perl_module_threads" version="1.92">
-        <repository changeset_revision="01ed0b079724" name="package_perl_threading" owner="geert-vandeweyer" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="perl_module_threads_shared" version="1.46">
-        <repository changeset_revision="01ed0b079724" name="package_perl_threading" owner="geert-vandeweyer" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="perl_module_Thread_Queue" version="3.02">
-        <repository changeset_revision="01ed0b079724" name="package_perl_threading" owner="geert-vandeweyer" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="tabix" version="0.2.6">
-	<repository changeset_revision="3d6beba7393e" name="package_tabix_0_2_6" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="blat_server" version="latest">
-	<repository changeset_revision="5920144e9e9b" name="package_blat_server" owner="jjohnson" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="igvtools" version="2.3.32">
-	<repository name="package_igvtools_2_3_32" changeset_revision="79f8f883a7a0" owner='geert-vandeweyer' toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>				
-