# HG changeset patch # User geert-vandeweyer # Date 1411970630 14400 # Node ID 7e4a9ee69f0b9d8bafcf36a3481500fb27cae11e # Parent 36cc147395adf99f82b51dcfbf739ed647800989 Uploaded diff -r 36cc147395ad -r 7e4a9ee69f0b DC_Genotyper.pl --- a/DC_Genotyper.pl Mon Sep 29 02:03:40 2014 -0400 +++ b/DC_Genotyper.pl Mon Sep 29 02:03:50 2014 -0400 @@ -170,27 +170,38 @@ ########################################################## my %dbsnp :shared; if ($snpfile ne '') { - my $bcf = `which bcftools`; - chomp($bcf); - if ($bcf ne '') { - my $command = "bcftools query -f '\%CHROM\\t\%POS\\t\%REF\\t\%ALT\\t\%ID\\n' -R '".$opts{'t'}."' '$snpfile' > $wd/dbsnp.txt"; - system("$command"); - open IN, "$wd/dbsnp.txt"; - while () { - chomp; - my @p = split(/\t/,$_); - $dbsnp{$p[0].'-'.$p[1]} = $p[2].'-'.$p[3].'-'.$p[4]; - } - close IN; - } - else { - print "WARNING: BCFtools is not in the path. Skipping snp handling.\n"; - $snpfile = ''; - system("touch $wd/dbsnp.txt"); - } + #my $bcf = `which bcftools`; + #chomp($bcf); + #if ($bcf ne '') { + # my $command = "bcftools query -f '\%CHROM\\t\%POS\\t\%REF\\t\%ALT\\t\%ID\\n' -R '".$opts{'t'}."' '$snpfile' > $wd/dbsnp.txt"; + # system("$command"); + # open IN, "$wd/dbsnp.txt"; + # while () { + # chomp; + # my @p = split(/\t/,$_); + # $dbsnp{$p[0].'-'.$p[1]} = $p[2].'-'.$p[3].'-'.$p[4]; + # } + # close IN; + #} + #else { + # print "WARNING: BCFtools is not in the path. Skipping snp handling.\n"; + # $snpfile = ''; + # system("touch $wd/dbsnp.txt"); + #} + system("tabix $snpfile -B $opts{'t'} | cut -f 1-5 > $wd/dbsnp.txt"); + my $lc = `cat $wd/dbsnp.txt | wc -l`; + chomp($lc); + if ($lc eq '0') { + open SNP, ">$wd/dbsnp.txt"; + ## dummy line on chr zero + print SNP "chr0\t1\t.\tA\tT\n"; + close SNP; } else { - system("touch $wd/dbsnp.txt"); + open SNP, ">$wd/dbsnp.txt"; + ## dummy line on chr zero + print SNP "chr0\t1\t.\tA\tT\n"; + close SNP; } ## now process the bam file. @@ -278,14 +289,15 @@ ############ # create the R script. open R, ">$wd/CallSNPs.R"; +print R "\n"; print R "args <- commandArgs(trailingOnly = TRUE)\n"; -print R "counts <- read.table(file=args[1],header=FALSE, as.is=TRUE)\n"; +print R "counts <- read.table(file = args[1],header = FALSE, as.is = TRUE)\n"; print R "ploidy <- as.integer(args[3])\n"; print R "chr <- args[2]\n"; print R "snps <- read.table(file=args[5],header=FALSE,as.is=TRUE)\n"; -print R "colnames(snps) <- c('chr','pos','ref','alt','id')\n"; +print R "colnames(snps) <- c('chr','pos','id','ref','alt')\n"; print R "colnames(counts) <- c('pos','ref','A','C','G','T','TotalDepth')\n"; -print R "dists <- read.table(file='$wd/allelic_distributions.txt',header=TRUE,as.is=TRUE)\n"; +print R "dists <- read.table(file='".$opts{'a'}."',header=TRUE,as.is=TRUE)\n"; print R 'rownames(dists) = dists$allele'."\n"; print R 'dists <- dists[,-1]'."\n"; print R "vcf <- c()\n"; @@ -489,7 +501,7 @@ while (defined(my $allele = $alleles->dequeue())) { system("sed -i 's/.\$//' '$wd/counts_$allele.$mincov"."x.$hassnp.txt'"); open OUT, ">$wd/GetDistribution.$allele.R"; - print OUT "sample <- '$bam'\n"; + print OUT "\n"; print OUT "nt <- '$allele'\n"; #print OUT "pdf(file='$wd/Distribution.$allele.$mincov"."x.$hassnp.pdf',paper='a4')\n"; print OUT "data <- scan(file='$wd/counts_$allele.$mincov"."x.$hassnp.txt',sep=',')\n";