Mercurial > repos > geert-vandeweyer > dc_genotyper
comparison DC_Genotyper.pl @ 16:7e4a9ee69f0b draft
Uploaded
author | geert-vandeweyer |
---|---|
date | Mon, 29 Sep 2014 02:03:50 -0400 |
parents | 8cc26598eeac |
children | 3ba482a2dd0e |
comparison
equal
deleted
inserted
replaced
15:36cc147395ad | 16:7e4a9ee69f0b |
---|---|
168 | 168 |
169 ## load dbSNP inside target regions into shared structure. | 169 ## load dbSNP inside target regions into shared structure. |
170 ########################################################## | 170 ########################################################## |
171 my %dbsnp :shared; | 171 my %dbsnp :shared; |
172 if ($snpfile ne '') { | 172 if ($snpfile ne '') { |
173 my $bcf = `which bcftools`; | 173 #my $bcf = `which bcftools`; |
174 chomp($bcf); | 174 #chomp($bcf); |
175 if ($bcf ne '') { | 175 #if ($bcf ne '') { |
176 my $command = "bcftools query -f '\%CHROM\\t\%POS\\t\%REF\\t\%ALT\\t\%ID\\n' -R '".$opts{'t'}."' '$snpfile' > $wd/dbsnp.txt"; | 176 # my $command = "bcftools query -f '\%CHROM\\t\%POS\\t\%REF\\t\%ALT\\t\%ID\\n' -R '".$opts{'t'}."' '$snpfile' > $wd/dbsnp.txt"; |
177 system("$command"); | 177 # system("$command"); |
178 open IN, "$wd/dbsnp.txt"; | 178 # open IN, "$wd/dbsnp.txt"; |
179 while (<IN>) { | 179 # while (<IN>) { |
180 chomp; | 180 # chomp; |
181 my @p = split(/\t/,$_); | 181 # my @p = split(/\t/,$_); |
182 $dbsnp{$p[0].'-'.$p[1]} = $p[2].'-'.$p[3].'-'.$p[4]; | 182 # $dbsnp{$p[0].'-'.$p[1]} = $p[2].'-'.$p[3].'-'.$p[4]; |
183 } | 183 # } |
184 close IN; | 184 # close IN; |
185 } | 185 #} |
186 else { | 186 #else { |
187 print "WARNING: BCFtools is not in the path. Skipping snp handling.\n"; | 187 # print "WARNING: BCFtools is not in the path. Skipping snp handling.\n"; |
188 $snpfile = ''; | 188 # $snpfile = ''; |
189 system("touch $wd/dbsnp.txt"); | 189 # system("touch $wd/dbsnp.txt"); |
190 } | 190 #} |
191 system("tabix $snpfile -B $opts{'t'} | cut -f 1-5 > $wd/dbsnp.txt"); | |
192 my $lc = `cat $wd/dbsnp.txt | wc -l`; | |
193 chomp($lc); | |
194 if ($lc eq '0') { | |
195 open SNP, ">$wd/dbsnp.txt"; | |
196 ## dummy line on chr zero | |
197 print SNP "chr0\t1\t.\tA\tT\n"; | |
198 close SNP; | |
191 } | 199 } |
192 else { | 200 else { |
193 system("touch $wd/dbsnp.txt"); | 201 open SNP, ">$wd/dbsnp.txt"; |
202 ## dummy line on chr zero | |
203 print SNP "chr0\t1\t.\tA\tT\n"; | |
204 close SNP; | |
194 } | 205 } |
195 | 206 |
196 ## now process the bam file. | 207 ## now process the bam file. |
197 mkdir "$wd/WIGS/"; | 208 mkdir "$wd/WIGS/"; |
198 my $bam :shared; | 209 my $bam :shared; |
276 | 287 |
277 ## CALL SNPs | 288 ## CALL SNPs |
278 ############ | 289 ############ |
279 # create the R script. | 290 # create the R script. |
280 open R, ">$wd/CallSNPs.R"; | 291 open R, ">$wd/CallSNPs.R"; |
292 print R "\n"; | |
281 print R "args <- commandArgs(trailingOnly = TRUE)\n"; | 293 print R "args <- commandArgs(trailingOnly = TRUE)\n"; |
282 print R "counts <- read.table(file=args[1],header=FALSE, as.is=TRUE)\n"; | 294 print R "counts <- read.table(file = args[1],header = FALSE, as.is = TRUE)\n"; |
283 print R "ploidy <- as.integer(args[3])\n"; | 295 print R "ploidy <- as.integer(args[3])\n"; |
284 print R "chr <- args[2]\n"; | 296 print R "chr <- args[2]\n"; |
285 print R "snps <- read.table(file=args[5],header=FALSE,as.is=TRUE)\n"; | 297 print R "snps <- read.table(file=args[5],header=FALSE,as.is=TRUE)\n"; |
286 print R "colnames(snps) <- c('chr','pos','ref','alt','id')\n"; | 298 print R "colnames(snps) <- c('chr','pos','id','ref','alt')\n"; |
287 print R "colnames(counts) <- c('pos','ref','A','C','G','T','TotalDepth')\n"; | 299 print R "colnames(counts) <- c('pos','ref','A','C','G','T','TotalDepth')\n"; |
288 print R "dists <- read.table(file='$wd/allelic_distributions.txt',header=TRUE,as.is=TRUE)\n"; | 300 print R "dists <- read.table(file='".$opts{'a'}."',header=TRUE,as.is=TRUE)\n"; |
289 print R 'rownames(dists) = dists$allele'."\n"; | 301 print R 'rownames(dists) = dists$allele'."\n"; |
290 print R 'dists <- dists[,-1]'."\n"; | 302 print R 'dists <- dists[,-1]'."\n"; |
291 print R "vcf <- c()\n"; | 303 print R "vcf <- c()\n"; |
292 print R "lower <- c()\n"; | 304 print R "lower <- c()\n"; |
293 print R "higher <- c()\n"; | 305 print R "higher <- c()\n"; |
487 | 499 |
488 sub GetDistribution { | 500 sub GetDistribution { |
489 while (defined(my $allele = $alleles->dequeue())) { | 501 while (defined(my $allele = $alleles->dequeue())) { |
490 system("sed -i 's/.\$//' '$wd/counts_$allele.$mincov"."x.$hassnp.txt'"); | 502 system("sed -i 's/.\$//' '$wd/counts_$allele.$mincov"."x.$hassnp.txt'"); |
491 open OUT, ">$wd/GetDistribution.$allele.R"; | 503 open OUT, ">$wd/GetDistribution.$allele.R"; |
492 print OUT "sample <- '$bam'\n"; | 504 print OUT "\n"; |
493 print OUT "nt <- '$allele'\n"; | 505 print OUT "nt <- '$allele'\n"; |
494 #print OUT "pdf(file='$wd/Distribution.$allele.$mincov"."x.$hassnp.pdf',paper='a4')\n"; | 506 #print OUT "pdf(file='$wd/Distribution.$allele.$mincov"."x.$hassnp.pdf',paper='a4')\n"; |
495 print OUT "data <- scan(file='$wd/counts_$allele.$mincov"."x.$hassnp.txt',sep=',')\n"; | 507 print OUT "data <- scan(file='$wd/counts_$allele.$mincov"."x.$hassnp.txt',sep=',')\n"; |
496 print OUT "nr <- length(data)\n"; | 508 print OUT "nr <- length(data)\n"; |
497 print OUT "avg <- mean(data)\n"; | 509 print OUT "avg <- mean(data)\n"; |