annotate DC_Genotyper.pl @ 16:7e4a9ee69f0b draft

Uploaded
author geert-vandeweyer
date Mon, 29 Sep 2014 02:03:50 -0400
parents 8cc26598eeac
children 3ba482a2dd0e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/perl
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
2 #############################
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
3 ## DEEP COVERAGE GENOTYPER ##
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
4 #############################
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
5 # version : 0.0.1
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
6 # Principle:
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
7 # 1. get allele counts on all positions in specified targets (bed) using igvtools. Only SNPs !!
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
8 # 2. remove known dbsnp positions (bcf file)
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
9 # 3. Get distribution of background noise (pcr/sequencing errors), by modelling allele fractions as normal distributions.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
10 # 4. Based on these distributions, check each position for significant change from the reference allele (based on allele fraction)
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
11 # 5. For abberant positions, check each alternate allele to see if it passes the background signal.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
12 # 6. Generate VCF file.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
13
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
14
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
15 ##################
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
16 ## LOAD MODULES ##
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
17 ##################
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
18 use threads;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
19 use threads::shared;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
20 use Thread::Queue;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
21 use Getopt::Std;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
22
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
23 ####################
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
24 ## get paramaters ##
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
25 ####################
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
26 # t: target file
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
27 # b: bam file
12
8cc26598eeac Uploaded
geert-vandeweyer
parents: 11
diff changeset
28 # R: reference genome files for twobit and IGV.
11
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
29 # p: number of threads.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
30 # s: dbsnp file
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
31 # m: minimal coverage (defaults 400x)
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
32 # P: ploidy
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
33 # a: outfile for allele distributions
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
34 # v: vcf file output.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
35 getopts('t:b:R:p:s:m:P:v:a:', \%opts) ;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
36
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
37 ## variables
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
38 my $twobit :shared;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
39 my $igvgenome :shared;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
40 if (!defined($opts{'R'})) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
41 die("Reference Genomes not specified\n");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
42 }
12
8cc26598eeac Uploaded
geert-vandeweyer
parents: 11
diff changeset
43 my @refgenomes = split(",",$opts{'R'});
11
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
44 if (!-e $refgenomes[0]) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
45 die("'$refgenomes[0]' is not a valid file path.");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
46 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
47 else {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
48 $twobit = $refgenomes[0];
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
49 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
50 if (!-e $refgenomes[1]) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
51 die("'$refgenomes[1]' is not a valid file path.");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
52 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
53 else {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
54 $igvgenome = $refgenomes[1];
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
55 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
56
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
57
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
58 my $mincov :shared;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
59 $mincov = 320;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
60 if (defined($opts{'m'})) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
61 $mincov = $opts{'m'};
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
62 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
63
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
64 my $ploidy :shared;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
65 if (defined($opts{'P'}) && $opts{'P'} =~ m/^\d+$/) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
66 $ploidy = $opts{'P'};
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
67 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
68 else {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
69 die("Ploidy (-P) was not specified or not an integer\n");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
70 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
71
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
72
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
73 if (defined($opts{'v'})) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
74 $outfile = $opts{'v'};
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
75 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
76 else {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
77 die("No output vcf-file specified.\n");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
78 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
79 if (!defined($opts{'a'})) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
80 die("No output file specified for distribution details\n");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
81 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
82 ## create working dir.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
83 my $rand = int(rand(10000));
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
84 while (-d "/tmp/DC_Genotyper_$rand") {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
85 $rand = int(rand(10000));
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
86 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
87 my $wd :shared;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
88 $wd = "/tmp/DC_Genotyper_$rand";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
89 system("mkdir '$wd'");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
90
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
91
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
92 my $snpfile :shared;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
93 my $hassnp :shared;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
94 $hassnp = 'NoDbSNP';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
95 $snpfile = '';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
96 if (defined($opts{'s'})) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
97 $snpfile = $opts{'s'};
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
98 if (!-e $snpfile) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
99 die("'$snpfile' is not a valid file path.");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
100 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
101
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
102 my $mime = `file $snpfile`;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
103 if ($mime !~ m/compressed/) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
104 print "$snpfile is not in compressed format. compressing & indexing the file now.\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
105 #print "... this takes a while\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
106 system("bgzip -c $snpfile > $wd/dbSNP.vcf.bgz");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
107 system("cd $wd/ && tabix -p vcf dbSNP.vcf.bgz");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
108 $snpfile = "$wd/dbSNP.vcf.bgz";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
109 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
110 elsif (!-e "$snpfile.tbi") {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
111 print "tabix index file is missing for '$snpfile'. creating now.\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
112 ## check if I can write it out for future use
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
113 $snpfile =~ m/(.*)([^\/]+)$/;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
114 my $d = $1;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
115 if (-w $d) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
116 open OUT, ">$d/lock";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
117 flock(OUT,2);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
118 system("cd $d && tabix -p vcf $snpfile");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
119 close OUT;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
120 system("rm $d/lock");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
121 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
122 else {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
123 system("cp $snpfile /$wd/dbSNP.vcf.bgz");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
124 system("cd $wd/ && tabix -p vcf dbSNP.vcf.bgz");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
125 $snpfile = "$wd/dbSNP.vcf.bgz";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
126 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
127 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
128 $hassnp = 'WithDbSNP';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
129 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
130
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
131
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
132 ## 1. Get FASTA and prepare output hashes:
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
133 my $targets_one = Thread::Queue->new();
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
134 my $targets_two = Thread::Queue->new();
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
135 my $targets_three = Thread::Queue->new();
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
136 open IN, $opts{'t'} or die("Could not open $opts{'t'} file for reading");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
137 if (-d "$wd/Fasta/") {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
138 system("rm $wd/Fasta/*");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
139 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
140 else {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
141 system("mkdir $wd/Fasta");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
142 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
143 ## create the threads.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
144 for (my $i = 1; $i<= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
145 ${'thr'.$i} = threads->create('FetchFasta');
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
146 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
147
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
148 ## enqueue the targets.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
149 my %thash;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
150 while (<IN>) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
151 chomp;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
152 my ($chr,$start,$stop,$name,$score,$strand) = split(/\t/,$_);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
153 $targets_one->enqueue($_);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
154 $targets_two->enqueue($_);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
155 $targets_three->enqueue($_);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
156 $thash{$chr}{$start} = $stop;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
157 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
158 close IN;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
159
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
160 ## end the threads.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
161 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
162 $targets_one->enqueue(undef);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
163 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
164
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
165 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
166 ${'thr'.$i}->join();
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
167 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
168
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
169 ## load dbSNP inside target regions into shared structure.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
170 ##########################################################
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
171 my %dbsnp :shared;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
172 if ($snpfile ne '') {
16
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
173 #my $bcf = `which bcftools`;
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
174 #chomp($bcf);
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
175 #if ($bcf ne '') {
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
176 # my $command = "bcftools query -f '\%CHROM\\t\%POS\\t\%REF\\t\%ALT\\t\%ID\\n' -R '".$opts{'t'}."' '$snpfile' > $wd/dbsnp.txt";
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
177 # system("$command");
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
178 # open IN, "$wd/dbsnp.txt";
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
179 # while (<IN>) {
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
180 # chomp;
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
181 # my @p = split(/\t/,$_);
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
182 # $dbsnp{$p[0].'-'.$p[1]} = $p[2].'-'.$p[3].'-'.$p[4];
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
183 # }
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
184 # close IN;
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
185 #}
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
186 #else {
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
187 # print "WARNING: BCFtools is not in the path. Skipping snp handling.\n";
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
188 # $snpfile = '';
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
189 # system("touch $wd/dbsnp.txt");
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
190 #}
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
191 system("tabix $snpfile -B $opts{'t'} | cut -f 1-5 > $wd/dbsnp.txt");
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
192 my $lc = `cat $wd/dbsnp.txt | wc -l`;
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
193 chomp($lc);
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
194 if ($lc eq '0') {
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
195 open SNP, ">$wd/dbsnp.txt";
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
196 ## dummy line on chr zero
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
197 print SNP "chr0\t1\t.\tA\tT\n";
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
198 close SNP;
11
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
199 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
200 else {
16
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
201 open SNP, ">$wd/dbsnp.txt";
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
202 ## dummy line on chr zero
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
203 print SNP "chr0\t1\t.\tA\tT\n";
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
204 close SNP;
11
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
205 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
206
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
207 ## now process the bam file.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
208 mkdir "$wd/WIGS/";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
209 my $bam :shared;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
210 $bam = $opts{'b'};
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
211 my $bai = $bam.".bai";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
212 if (!-e $bai) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
213 print "BAI ($bai) missing for $bam : creating\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
214 system("samtools index $bam");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
215 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
216
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
217 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
218 ${'thr'.$i} = threads->create('CountAlleles');
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
219 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
220 ## end the threads.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
221 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
222 $targets_two->enqueue(undef);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
223 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
224
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
225 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
226 ${'thr'.$i}->join();
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
227 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
228
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
229 ## generate the distributions.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
230 ##############################
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
231 my $alleles = Thread::Queue->new();
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
232 my %all = ('A' => 1,'C' => 2,'G' => 3, 'T' => 4);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
233 foreach(keys(%all)) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
234 $alleles->enqueue($_);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
235 my $a = $_;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
236 foreach(keys(%all)) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
237 if ($_ eq $a) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
238 next;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
239 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
240 $alleles->enqueue($a.'-'.$_);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
241 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
242 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
243 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
244 ${'thr'.$i} = threads->create('GetDistribution');
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
245 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
246 ## end the threads.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
247 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
248 $alleles->enqueue(undef);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
249 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
250
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
251 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
252 ${'thr'.$i}->join();
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
253 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
254
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
255 ## group distributions into one file
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
256 ####################################
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
257 my %map =('A' => 2,'C' => 3,'G' => 4, 'T' => 5);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
258 open OUT, ">".$opts{'a'};
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
259 print OUT "allele\tavg\tsd\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
260 foreach(keys(%map)) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
261 my $r = $_;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
262 my $f = "$wd/model.$r.$mincov"."x.$hassnp.txt";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
263 open IN, "$f";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
264 my $a = <IN>;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
265 chomp($a);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
266 #$dists{$r}{'avg'} = $a;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
267 my $s = <IN>;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
268 chomp($s);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
269 #$dists{$r}{'sd'} = $s;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
270 close IN;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
271 print OUT "$r\t$a\t$s\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
272 foreach(keys(%map)) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
273 if ($_ eq $r) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
274 next;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
275 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
276 my $f = "$wd/model.$r-$_.$mincov"."x.$hassnp.txt";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
277 open IN, "$f";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
278 my $a = <IN>;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
279 chomp($a);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
280 my $s = <IN>;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
281 chomp($s);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
282 close IN;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
283 print OUT "$r-$_\t$a\t$s\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
284 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
285 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
286 close OUT;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
287
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
288 ## CALL SNPs
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
289 ############
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
290 # create the R script.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
291 open R, ">$wd/CallSNPs.R";
16
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
292 print R "\n";
11
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
293 print R "args <- commandArgs(trailingOnly = TRUE)\n";
16
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
294 print R "counts <- read.table(file = args[1],header = FALSE, as.is = TRUE)\n";
11
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
295 print R "ploidy <- as.integer(args[3])\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
296 print R "chr <- args[2]\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
297 print R "snps <- read.table(file=args[5],header=FALSE,as.is=TRUE)\n";
16
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
298 print R "colnames(snps) <- c('chr','pos','id','ref','alt')\n";
11
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
299 print R "colnames(counts) <- c('pos','ref','A','C','G','T','TotalDepth')\n";
16
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
300 print R "dists <- read.table(file='".$opts{'a'}."',header=TRUE,as.is=TRUE)\n";
11
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
301 print R 'rownames(dists) = dists$allele'."\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
302 print R 'dists <- dists[,-1]'."\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
303 print R "vcf <- c()\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
304 print R "lower <- c()\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
305 print R "higher <- c()\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
306 print R "for (i in 1:(ploidy)) {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
307 print R " lower[length(lower)+1] <- (2*i-1)/(2*ploidy)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
308 print R " higher[length(higher)+1] <- (2*i+1)/(2*ploidy)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
309 print R "}\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
310 print R "for (i in 1:nrow(counts)) {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
311 print R " if (counts[i,'TotalDepth'] == 0) next\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
312 print R " # significantly different from reference?\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
313 print R " z <- ((counts[i,counts[i,'ref']]/counts[i,'TotalDepth']) - dists[counts[i,'ref'],'avg']) / dists[counts[i,'ref'],'sd']\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
314 print R " if (abs(z) > 3) {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
315 print R " # test all alterate alleles to see which one is significant.\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
316 print R " for (j in c('A','C','G','T')) {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
317 print R " if (j == counts[i,'ref']) next\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
318 print R " z <- ((counts[i,j]/counts[i,'TotalDepth']) - dists[paste(counts[i,'ref'],'-',j,sep=''),'avg']) / dists[paste(counts[i,'ref'],'-',j,sep=''),'sd']\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
319 print R " if (abs(z) > 3){\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
320 print R " filter <- 'PASS'\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
321 print R " phred <- round(-10*log(pnorm(-abs(z))),digits=0)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
322 print R " if (phred > 9999) phred <- 9999\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
323 print R " frac <- counts[i,j]/counts[i,'TotalDepth']\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
324 print R " for (k in 1:ploidy) {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
325 print R " if (frac >= lower[k] && frac < higher[k]) {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
326 print R " sample <- paste(paste(paste(rep(0,(ploidy-k)),sep='',collapse='/'),paste(rep(1,k),sep='',collapse='/'),sep='/',collapse=''),':',counts[i,counts[i,'ref']],',',counts[i,j],sep='',collapse='')\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
327 print R " af <- k/ploidy\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
328 print R " break\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
329 print R " }\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
330 print R " }\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
331 print R " if (frac < lower[1]) {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
332 print R " sample <- paste(paste(paste(rep(0,(ploidy-1)),sep='',collapse='/'),paste(rep(1,1),sep='',collapse='/'),sep='/',collapse=''),':',counts[i,counts[i,'ref']],',',counts[i,j],sep='',collapse='')\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
333 print R " af <- 1/ploidy\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
334 print R " filter <- 'LowFraction'\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
335 print R " }\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
336 print R " if (counts[i,'TotalDepth'] < $mincov) {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
337 print R " filter <- 'LowCoverage'\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
338 print R " }\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
339 print R " info <- paste('DP=',counts[i,'TotalDepth'],';AF=',round(af,digits=5),';AR=',round(frac,digits=5),sep='')\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
340 print R " snpids <- which(snps\$chr == chr & snps\$pos == counts[i,'pos'])\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
341 print R " id <- '.'\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
342 print R " if (length(snpids) > 0) id <- snps[snpids[1],'id']\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
343 print R " vcf[length(vcf)+1] <- paste(chr,counts[i,'pos'],id,counts[i,'ref'],j,phred,filter,info,'GT:AD',sample,sep='\\t',collapse='')\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
344 print R " }\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
345 print R " }\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
346 print R " }\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
347 print R "}\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
348 print R "if (length(vcf) > 0) {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
349 print R " write(file=args[4],paste(vcf,sep='\\n'))\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
350 print R "}\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
351 close R;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
352 system("mkdir $wd/VCF/");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
353 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
354 ${'thr'.$i} = threads->create('CallSNPs');
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
355 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
356 ## end the threads.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
357 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
358 $targets_three->enqueue(undef);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
359 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
360
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
361 for (my $i = 1; $i <= $opts{'p'}; $i++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
362 ${'thr'.$i}->join();
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
363 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
364
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
365 ## BUILD FINAL VCF
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
366 open OUT, ">$outfile";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
367 print OUT "##fileformat=VCFv4.1\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
368 print OUT "##source=High_Ploidy_Genotyper_v.0.1\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
369 print OUT "##genome_reference=$twobit\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
370 if ($snpfile ne '') {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
371 print OUT "##SNP_file=$snpfile\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
372 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
373 foreach(keys(%thash)) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
374 print OUT "##contig=<ID=$_,assembly=hg19,species=\"Homo Sapiens\">\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
375 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
376 print OUT "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
377 print OUT "##INFO=<ID=AF,Number=1,Type=Float,Description=\"Allele Frequency\">\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
378 print OUT "##INFO=<ID=AR,Number=1,Type=Float,Description=\"Allelic Ratio\">\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
379 print OUT "##FILTER=<ID=LowFraction,Description=\"Allelic Fraction under 1/2*$ploidy\">\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
380 print OUT "##FILTER=<ID=LowCoverage,Description=\"Total Depth is lower than threshold of $mincov\">\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
381 print OUT "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
382 print OUT "##FORMAT=<ID=AD,Number=2,type=Integer,Description,\"Allelic Depth\">\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
383 print OUT "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
384 close OUT;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
385 @i = ( 1 .. 22,'X','Y','M' );
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
386 foreach(@i) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
387 my $chr = "chr$_";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
388 foreach(sort {$a <=> $b} keys(%{$thash{$chr}})) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
389 my $v = "$wd/VCF/$chr.$_-".$thash{$chr}{$_}.".vcf";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
390 if (-e $v) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
391 system("cat '$v' >> '$outfile'");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
392 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
393 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
394 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
395
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
396 ## clean up
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
397 system("rm -Rf '$wd'");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
398
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
399 sub FetchFasta {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
400 while(defined(my $line = $targets_one->dequeue())) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
401 my ($chr,$start,$stop,$name,$score,$strand) = split(/\t/,$line);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
402 # 2bit is zero based, non-including => decrease start by one
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
403 $startposition = $start - 1;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
404 my $command = "twoBitToFa -seq=$chr -start=$startposition -end=$stop -noMask $twobit $wd/Fasta/$chr-$start-$stop.fasta";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
405 system($command);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
406 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
407 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
408
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
409 sub CountAlleles {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
410 # local version of hashes
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
411 my $snp = \%dbsnp;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
412 my %counts;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
413 $counts{'A'} = '';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
414 $counts{'C'} = '';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
415 $counts{'G'} = '';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
416 $counts{'T'} = '';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
417 my %map =('A' => 1,'C' => 2,'G' => 3, 'T' => 4);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
418 my %options;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
419 foreach(keys(%map)) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
420 my $r = $_;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
421 foreach(keys(%map)) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
422 if ($_ eq $r) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
423 next;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
424 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
425 $options{$r.'-'.$_} = '';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
426 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
427 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
428 while (defined(my $line = $targets_two->dequeue())) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
429 $out = '';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
430 my ($chr,$start,$stop,$name,$score,$strand) = split(/\t/,$line);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
431 ## get reference alleles
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
432 my %ref_alleles;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
433 open FASTA, "$wd/Fasta/$chr-$start-$stop.fasta";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
434 my $head = <FASTA>;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
435 my $seq = '';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
436 while (<FASTA>) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
437 chomp;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
438 $seq .= $_;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
439 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
440 close FASTA;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
441 # this generates a hash of the reference alleles once, instead of substr-calls in every bam, on every iteration.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
442 for (my $pos = 0; $pos < length($seq); $pos++) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
443 $ref_alleles{($pos+$start)} = substr($seq,$pos,1);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
444 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
445 ## get counts.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
446 my $target = "$chr:$start-$stop";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
447 my $command = "igvtools count -w 1 --bases --query '$target' '$bam' '$wd/WIGS/$chr-$start-$stop.wig' '$igvgenome' > /dev/null 2>&1";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
448 system($command);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
449 open WIG, "$wd/WIGS/$chr-$start-$stop.wig";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
450 my $h = <WIG>;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
451 $h = <WIG>;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
452 $h = <WIG>;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
453 my $target_counts = '';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
454 while (<WIG>) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
455 chomp;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
456 #my ($pos, $a, $c, $g, $t , $n) = split(/\t/,$_);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
457 my @p = split(/\t/,$_);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
458 my $s = $p[1] + $p[2] + $p[3] + $p[4];
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
459 $target_counts .= "$p[0]\t$ref_alleles{$p[0]}\t$p[1]\t$p[2]\t$p[3]\t$p[4]\t$s\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
460 ## skip positions with coverage < minimal coverage, and positions in dbsnp if specified (if not specified, snp hash is empty).
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
461 if ($s > $mincov && !defined($snp->{$chr.'-'.$p[0]})) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
462 ## for model of 'non-reference'
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
463 my $frac = $p[$map{$ref_alleles{$p[0]}}] / $s;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
464 $counts{$ref_alleles{$p[0]}} .= $frac.',';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
465 $out .= "$target\t$p[0]\t$ref_alleles{$p[0]}\t$p[1]\t$p[2]\t$p[3]\t$p[4]\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
466 ## for each of the options background models
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
467 foreach(keys(%map)) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
468 if ($_ eq $ref_alleles{$p[0]}) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
469 next;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
470 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
471 $options{$ref_alleles{$p[0]}.'-'.$_} .= ($p[$map{$_}] / $s) .',';
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
472 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
473
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
474 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
475 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
476 close WIG;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
477 open OUT, ">>$wd/allcounts.$mincov"."x.$hassnp.txt";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
478 flock(OUT, 2);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
479 print OUT $out;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
480 close OUT;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
481 open OUT, ">$wd/WIGS/$chr.$start-$stop.txt";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
482 print OUT $target_counts;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
483 close OUT;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
484
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
485 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
486 foreach(keys(%counts)) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
487 open OUT, ">>$wd/counts_$_.$mincov"."x.$hassnp.txt";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
488 flock(OUT,2);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
489 print OUT $counts{$_};
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
490 close OUT;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
491 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
492 foreach(keys(%options)) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
493 open OUT, ">>$wd/counts_$_.$mincov"."x.$hassnp.txt";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
494 flock(OUT,2);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
495 print OUT $options{$_};
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
496 close OUT;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
497 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
498 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
499
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
500 sub GetDistribution {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
501 while (defined(my $allele = $alleles->dequeue())) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
502 system("sed -i 's/.\$//' '$wd/counts_$allele.$mincov"."x.$hassnp.txt'");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
503 open OUT, ">$wd/GetDistribution.$allele.R";
16
7e4a9ee69f0b Uploaded
geert-vandeweyer
parents: 12
diff changeset
504 print OUT "\n";
11
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
505 print OUT "nt <- '$allele'\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
506 #print OUT "pdf(file='$wd/Distribution.$allele.$mincov"."x.$hassnp.pdf',paper='a4')\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
507 print OUT "data <- scan(file='$wd/counts_$allele.$mincov"."x.$hassnp.txt',sep=',')\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
508 print OUT "nr <- length(data)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
509 print OUT "avg <- mean(data)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
510 print OUT "sdd <- sd(data)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
511 #print OUT "if (avg > 0.5) {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
512 #print OUT " x <- seq(0.8,1,length=1000)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
513 #print OUT " y <- dnorm(x,mean=avg,sd=sdd)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
514 #print OUT " plot(x,y,main='Distribution in sample $bam for nt $allele',xlab='Allelic Ratio',type='l',lwd=1)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
515 #print OUT " abline(v=(avg-3*sdd),col='red')\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
516 #print OUT " text(0.81,max(y-0.5),paste(c('avg: ',avg,'\\nsd: ',sdd,'\\nnrDataPoints:', nr,'\\n$hassnp\\nMin.Cov:',$mincov),sep=' ',collapse=''),adj=c(0,1))\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
517 #print OUT "} else {\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
518 #print OUT " x <- seq(0,0.3,length=1000)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
519 #print OUT " y <- dnorm(x,mean=avg,sd=sdd)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
520 #print OUT " plot(x,y,main='Distribution in sample $bam for nt $allele',xlab='Allelic Ratio',type='l',lwd=1)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
521 #print OUT " abline(v=(avg+3*sdd),col='red')\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
522 #print OUT " text(0.2,max(y-0.5),paste(c('avg: ',avg,'\\nsd: ',sdd,'\\nnrDataPoints:', nr,'\\n$hassnp\\nMin.Cov:',$mincov),sep=' ',collapse=''),adj=c(0,1))\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
523 #print OUT "}\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
524 #print OUT "dev.off()\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
525 print OUT "write(c(avg,sdd),file='$wd/model.$allele.$mincov"."x.$hassnp.txt',ncolumns=1)\n";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
526 close OUT;
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
527 system("cd $wd && Rscript GetDistribution.$allele.R >/dev/null 2>&1");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
528 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
529 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
530
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
531
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
532 sub CallSNPs {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
533 while (defined(my $line = $targets_three->dequeue())) {
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
534 # split.
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
535 my ($chr,$start,$stop,$name,$score,$strand) = split(/\t/,$line);
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
536 my $file = "$wd/WIGS/$chr.$start-$stop.txt";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
537 my $ofile = "$wd/VCF/$chr.$start-$stop.vcf";
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
538 system("cd $wd && Rscript CallSNPs.R '$file' '$chr' '$ploidy' '$ofile' '$wd/dbsnp.txt'");
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
539 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
540
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
541 }
845a87ad254a re-upload, accidentally removed some files
geert-vandeweyer
parents:
diff changeset
542