comparison DC_Genotyper.pl @ 16:7e4a9ee69f0b draft

Uploaded
author geert-vandeweyer
date Mon, 29 Sep 2014 02:03:50 -0400
parents 8cc26598eeac
children 3ba482a2dd0e
comparison
equal deleted inserted replaced
15:36cc147395ad 16:7e4a9ee69f0b
168 168
169 ## load dbSNP inside target regions into shared structure. 169 ## load dbSNP inside target regions into shared structure.
170 ########################################################## 170 ##########################################################
171 my %dbsnp :shared; 171 my %dbsnp :shared;
172 if ($snpfile ne '') { 172 if ($snpfile ne '') {
173 my $bcf = `which bcftools`; 173 #my $bcf = `which bcftools`;
174 chomp($bcf); 174 #chomp($bcf);
175 if ($bcf ne '') { 175 #if ($bcf ne '') {
176 my $command = "bcftools query -f '\%CHROM\\t\%POS\\t\%REF\\t\%ALT\\t\%ID\\n' -R '".$opts{'t'}."' '$snpfile' > $wd/dbsnp.txt"; 176 # my $command = "bcftools query -f '\%CHROM\\t\%POS\\t\%REF\\t\%ALT\\t\%ID\\n' -R '".$opts{'t'}."' '$snpfile' > $wd/dbsnp.txt";
177 system("$command"); 177 # system("$command");
178 open IN, "$wd/dbsnp.txt"; 178 # open IN, "$wd/dbsnp.txt";
179 while (<IN>) { 179 # while (<IN>) {
180 chomp; 180 # chomp;
181 my @p = split(/\t/,$_); 181 # my @p = split(/\t/,$_);
182 $dbsnp{$p[0].'-'.$p[1]} = $p[2].'-'.$p[3].'-'.$p[4]; 182 # $dbsnp{$p[0].'-'.$p[1]} = $p[2].'-'.$p[3].'-'.$p[4];
183 } 183 # }
184 close IN; 184 # close IN;
185 } 185 #}
186 else { 186 #else {
187 print "WARNING: BCFtools is not in the path. Skipping snp handling.\n"; 187 # print "WARNING: BCFtools is not in the path. Skipping snp handling.\n";
188 $snpfile = ''; 188 # $snpfile = '';
189 system("touch $wd/dbsnp.txt"); 189 # system("touch $wd/dbsnp.txt");
190 } 190 #}
191 system("tabix $snpfile -B $opts{'t'} | cut -f 1-5 > $wd/dbsnp.txt");
192 my $lc = `cat $wd/dbsnp.txt | wc -l`;
193 chomp($lc);
194 if ($lc eq '0') {
195 open SNP, ">$wd/dbsnp.txt";
196 ## dummy line on chr zero
197 print SNP "chr0\t1\t.\tA\tT\n";
198 close SNP;
191 } 199 }
192 else { 200 else {
193 system("touch $wd/dbsnp.txt"); 201 open SNP, ">$wd/dbsnp.txt";
202 ## dummy line on chr zero
203 print SNP "chr0\t1\t.\tA\tT\n";
204 close SNP;
194 } 205 }
195 206
196 ## now process the bam file. 207 ## now process the bam file.
197 mkdir "$wd/WIGS/"; 208 mkdir "$wd/WIGS/";
198 my $bam :shared; 209 my $bam :shared;
276 287
277 ## CALL SNPs 288 ## CALL SNPs
278 ############ 289 ############
279 # create the R script. 290 # create the R script.
280 open R, ">$wd/CallSNPs.R"; 291 open R, ">$wd/CallSNPs.R";
292 print R "\n";
281 print R "args <- commandArgs(trailingOnly = TRUE)\n"; 293 print R "args <- commandArgs(trailingOnly = TRUE)\n";
282 print R "counts <- read.table(file=args[1],header=FALSE, as.is=TRUE)\n"; 294 print R "counts <- read.table(file = args[1],header = FALSE, as.is = TRUE)\n";
283 print R "ploidy <- as.integer(args[3])\n"; 295 print R "ploidy <- as.integer(args[3])\n";
284 print R "chr <- args[2]\n"; 296 print R "chr <- args[2]\n";
285 print R "snps <- read.table(file=args[5],header=FALSE,as.is=TRUE)\n"; 297 print R "snps <- read.table(file=args[5],header=FALSE,as.is=TRUE)\n";
286 print R "colnames(snps) <- c('chr','pos','ref','alt','id')\n"; 298 print R "colnames(snps) <- c('chr','pos','id','ref','alt')\n";
287 print R "colnames(counts) <- c('pos','ref','A','C','G','T','TotalDepth')\n"; 299 print R "colnames(counts) <- c('pos','ref','A','C','G','T','TotalDepth')\n";
288 print R "dists <- read.table(file='$wd/allelic_distributions.txt',header=TRUE,as.is=TRUE)\n"; 300 print R "dists <- read.table(file='".$opts{'a'}."',header=TRUE,as.is=TRUE)\n";
289 print R 'rownames(dists) = dists$allele'."\n"; 301 print R 'rownames(dists) = dists$allele'."\n";
290 print R 'dists <- dists[,-1]'."\n"; 302 print R 'dists <- dists[,-1]'."\n";
291 print R "vcf <- c()\n"; 303 print R "vcf <- c()\n";
292 print R "lower <- c()\n"; 304 print R "lower <- c()\n";
293 print R "higher <- c()\n"; 305 print R "higher <- c()\n";
487 499
488 sub GetDistribution { 500 sub GetDistribution {
489 while (defined(my $allele = $alleles->dequeue())) { 501 while (defined(my $allele = $alleles->dequeue())) {
490 system("sed -i 's/.\$//' '$wd/counts_$allele.$mincov"."x.$hassnp.txt'"); 502 system("sed -i 's/.\$//' '$wd/counts_$allele.$mincov"."x.$hassnp.txt'");
491 open OUT, ">$wd/GetDistribution.$allele.R"; 503 open OUT, ">$wd/GetDistribution.$allele.R";
492 print OUT "sample <- '$bam'\n"; 504 print OUT "\n";
493 print OUT "nt <- '$allele'\n"; 505 print OUT "nt <- '$allele'\n";
494 #print OUT "pdf(file='$wd/Distribution.$allele.$mincov"."x.$hassnp.pdf',paper='a4')\n"; 506 #print OUT "pdf(file='$wd/Distribution.$allele.$mincov"."x.$hassnp.pdf',paper='a4')\n";
495 print OUT "data <- scan(file='$wd/counts_$allele.$mincov"."x.$hassnp.txt',sep=',')\n"; 507 print OUT "data <- scan(file='$wd/counts_$allele.$mincov"."x.$hassnp.txt',sep=',')\n";
496 print OUT "nr <- length(data)\n"; 508 print OUT "nr <- length(data)\n";
497 print OUT "avg <- mean(data)\n"; 509 print OUT "avg <- mean(data)\n";