annotate [APliBio]Nebula tools suite/Nebula/CreateControlSubSet/createControlSubSet.pl @ 0:2ec3ba0e9e70 draft

Uploaded
author alermine
date Thu, 25 Oct 2012 08:18:25 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1 #:t:::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
2 #:t::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
3 #:::::::::::::z;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
4 #::::::::::::i@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
5 #::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@$@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
6 #:::::::::::3@@@@@@@@@@@@@@@@@@@@@@@@@B@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
7 #::::::::::3@@@@@@@@@@@@@@@@@@@@@BEEESSE5EEEEBBM@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
8 #::::::::::3@@@@@@@@@@@@@@@@@@@@BEEEEEE35EE55E2355E5SBMB@@@@@@@@@@@@@@@@@$
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
9 #::::::::::@@@@@@@@@@@@@@@@@@@EEEE55533t3tttt::::::!!!!7755E755SBBMMM@@@MM
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
10 #::::::::::3@@@@@@@@@@@@@@@@@@EEEE2t3ttttt:::::::::::::::::::::::!7?5225EE
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
11 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEE31t::::::::::::::::::::::::::::::::3E5@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
12 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEEEtt:::::::::::::::::::::::::::::::::353
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
13 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEEE1ttz::::::::::::::::::::::::::::::::35
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
14 #:::::::::::@@@@@@@@@@@@@@@@@@EEEEEEEtz1::::::::::::::::::::::::::::::::t:
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
15 #:::::::::!3@@@@@@@@@@@@@@@@@@@EEEEEttt::::::::::::::::::::::::::::::::;zz
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
16 #::::::::::@@@@@@@@@@@@@@@@@@@@EEEEEttt:::::z;z:::::::::::::::::::::::::13
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
17 #::::::::::3B@@@@@@@@@@@@@@@@@@EEEEEEE3tt:czzztti;:::::::::::::::::::::::3
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
18 #::::ttt::::3@@@@@@@@@@@@@@@@EEEEE5EE25Ezt1EEEz5Etzzz;;;;:::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
19 #:::::::::::I9@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEE@@@@@@@@@@@@@@Ez;:::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
20 #:::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ez::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
21 #::::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@BE5EBB@@@@@@@@@@@@@@@EEE:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
22 #:::::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@E1::35@@@@@@@@@@ME3MMME2::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
23 #:::::::::::::::?@@@@@@@@@@@@@@@@@@M@@@@@@@EE:::::3SB@@BBESEEt::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
24 #::::::::::::::::J$@@@@@@@B@@@@@@@@@@@@@@@@EE:::::::!35E33t:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
25 #:::::::::::::::::3@E@@@EE5EESE5EESE@@@@@@@Et::::::::::::tz:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
26 #:::::::::::::::::J@E$@EEE5133555SE@@@@@@@@Et:::::::::::::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
27 #::::::::::::::::::E@E@EEEEtt3523EEE@@@@@@@E::::::::::::::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
28 #:t::::::::::::::::JEE3@@@EEEEEEEEEE@@@@@@@E:::::::::t;:::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
29 #:t:::::::::::::::::!5ES@EEEEEEEEES@@@@@@@@@E;:::;;;:3Ez::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
30 #:t::::::::::::::::::::JE@@EEEEEEE@@@@@@@@@@@@@@@@ME!:::;:::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
31 #:tz::::::::::::::::::::JE@@@EEEE@@@@@@@@@@@@@@EE!:::::::t::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
32 #:t::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@ESBE::::::::::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
33 #:::::::::::::::::::::::::Q@@@@@@@@@@@@@@@@EE3EE;:::::zzzz::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
34 #:::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@@@@NN@@@@@@Ez:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
35 #:zt:::::::::::::::::::::::3@@@@EE@@@@@@@@@@EEEEt::;z113E5t:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
36 #::tt:::::::::::::::::::::::3@@@E@@@@@@@@@@@@@@@@BEt::::::::::::::::t:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
37 #:tt:t:::::::::::::::::::::::?S@@@@@@@@@@@BBEEE51!::::::::::::::zzzEt:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
38 #::::::::::::::::::::::::::::::3Q@@@@@@@BEEEEEt:::::::::::::;zz@@@EE::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
39 #::::::::::::::::::::::::::::::::75B@@@@@EEEtt;:::::::::;zz@@@@BEEEtz:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
40 #::::::::::::::::::::::::::::::::::::?9@@@@@@@@@@@E2Ezg@@@@@B@@@EEEE1t::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
41 #:::::::::::::::::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@E@EEEEEEEzzz::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
42 #::::::::::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@EEEEEEE5ttttt
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
43 #:::::::::::::::::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEEEEEEEtzt
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
44 #::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@E@@EEEEEEEEEEEE@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
45 #::::::::::::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE3EEEE@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
46 #:::::::::::::::::::::;;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEt33@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
47 #:::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@E@@@@@@EEEtg@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
48 #::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
49 #:::::::::::::@@@@@@@@@@@@@@@@@$@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
50 #::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
51 #
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
52 # Copyleft ↄ⃝ 2012 Institut Curie
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
53 # Author(s): Valentina Boeva, Alban Lermine (Institut Curie) 2012
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
54 # Contact: valentina.boeva@curie.fr, alban.lermine@curie.fr
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
55 # This software is distributed under the terms of the GNU General
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
56 # Public License, either Version 2, June 1991 or Version 3, June 2007.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
57
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
58 #!/usr/bin/perl
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
59
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
60 #filter out dulpicates from SAMPLE (optional) and create a control dataset w/o duplicates with the same number of reads as in the SAMPLE
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
61
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
62 use strict;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
63 use warnings;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
64 use diagnostics;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
65
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
66 my $usage = qq{
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
67 $0
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
68
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
69 -----------------------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
70 mandatory parameters:
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
71
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
72 -f CHiP_file
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
73 -c control_file
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
74 -t type [bam, sam, eland]
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
75 -o output file
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
76 -----------------------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
77 optional parameters:
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
78
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
79 none
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
80 };
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
81
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
82 if(scalar(@ARGV) == 0){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
83 print $usage;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
84 exit(0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
85 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
86
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
87 ## mandatory arguments
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
88
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
89 my $filename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
90 my $output_fname = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
91
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
92 my $controlFilename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
93 my $type = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
94 my $sampleOutput = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
95 my $samtools_bin_dir=;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
96
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
97
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
98 ## optional arguments
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
99
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
100 ## parse command line arguments
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
101
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
102 while(scalar(@ARGV) > 0){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
103 my $this_arg = shift @ARGV;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
104 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
105
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
106 elsif ( $this_arg eq '-f') {$filename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
107 elsif ( $this_arg eq '-c') {$controlFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
108 elsif ( $this_arg eq '-t') {$type = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
109 elsif ( $this_arg eq '-o') {$output_fname = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
110 elsif ( $this_arg eq '-s') {$sampleOutput = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
111
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
112
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
113 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
114 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
115
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
116 if ( $filename eq ""){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
117 die "you should specify chip file\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
118 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
119 if( $controlFilename eq ""){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
120 die "you should specify control file\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
121 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
122 if( $type eq ""){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
123 die "you should specify file type (bam, sam or eland)\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
124 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
125 if( $output_fname eq ""){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
126 die "you should specify output filename\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
127 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
128
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
129
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
130 print "\n-----------------\n\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
131
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
132 my %hash;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
133 my $chipCount = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
134 my @header;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
135
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
136
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
137 if ($type eq "eland") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
138 open FILE, "< $filename " || die "$filename : $!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
139 while(<FILE>){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
140 my @fields = split(/\t/,$_);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
141 my $entry = $fields[6].":".$fields[7]."-".$fields[8];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
142 unless (exists($hash{$entry})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
143 $hash{$entry} = $_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
144 $chipCount++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
145 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
146 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
147 } elsif ($type eq "sam") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
148 open FILE, "< $filename " || die "$filename : $!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
149 while(<FILE>){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
150 if (m/^@/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
151 push(@header,$_);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
152 next;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
153 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
154 my @fields = split(/\t/,$_);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
155 next if (scalar(@fields)<10);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
156 my $entry = $fields[2].":".$fields[3]."-".$fields[1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
157 unless (exists($hash{$entry})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
158 $hash{$entry} = $_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
159 $chipCount++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
160 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
161 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
162 } elsif ($type eq "bam") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
163 open(FILE, "$samtools_bin_dir/samtools view -h $filename |") or die "$0: can't open ".$filename.":$!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
164 while(<FILE>){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
165 if (m/^@/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
166 push(@header,$_);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
167 next;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
168 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
169 my @fields = split(/\t/,$_);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
170 next if (scalar(@fields)<10);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
171 my $entry = $fields[2].":".$fields[3]."-".$fields[1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
172 unless (exists($hash{$entry})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
173 $hash{$entry} = $_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
174 $chipCount++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
175 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
176 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
177 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
178 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
179 print "ChIP: $chipCount\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
180
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
181 if ($sampleOutput ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
182
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
183 open OUT, "> $sampleOutput" || die "$sampleOutput: $!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
184
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
185 if ($type eq "bam" || $type eq "sam") { #print header
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
186 for my $headerLine (@header) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
187 print OUT $headerLine;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
188 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
189 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
190 for my $line (values %hash) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
191 print OUT $line;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
192 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
193 close OUT;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
194 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
195
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
196 delete @hash{keys %hash};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
197 @header = ();
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
198
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
199 my $controlCount = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
200 if ($type eq "eland") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
201 open FILE, "< $controlFilename " || die "$controlFilename : $!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
202 while(<FILE>){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
203 my @fields = split(/\t/,$_);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
204 my $entry = $fields[6].":".$fields[7]."-".$fields[8];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
205 unless (exists($hash{$entry})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
206 $hash{$entry} = $_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
207 $controlCount++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
208 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
209 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
210 } elsif ($type eq "sam") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
211 open FILE, "< $controlFilename " || die "$controlFilename : $!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
212 while(<FILE>){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
213 if (m/^@/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
214 push(@header,$_);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
215 next;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
216 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
217 my @fields = split(/\t/,$_);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
218 my $entry = $fields[2].":".$fields[3]."-".$fields[1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
219 unless (exists($hash{$entry})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
220 $hash{$entry} = $_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
221 $controlCount++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
222 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
223 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
224 } elsif ($type eq "bam") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
225 open(FILE, "$samtools_bin_dir/samtools view -h $controlFilename |") or die "$0: can't open ".$controlFilename.":$!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
226 while(<FILE>){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
227 if (m/^@/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
228 push(@header,$_);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
229 next;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
230 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
231 my @fields = split(/\t/,$_);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
232 my $entry = $fields[2].":".$fields[3]."-".$fields[1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
233 unless (exists($hash{$entry})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
234 $hash{$entry} = $_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
235 $controlCount++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
236 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
237 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
238 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
239 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
240 print "Control: $controlCount\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
241 my $prob = $chipCount/$controlCount;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
242
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
243 open OUT, "> $output_fname" || die "$output_fname: $!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
244
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
245 if ($type eq "bam" || $type eq "sam") { #print header
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
246 for my $headerLine (@header) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
247 print OUT $headerLine;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
248 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
249 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
250 my $count = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
251
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
252 for my $line (values %hash) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
253 my $rand = rand();
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
254
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
255 if ($rand < $prob) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
256 print OUT $line;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
257 $count ++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
258 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
259 last if ($count == $chipCount);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
260 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
261
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
262
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
263 if ($count < $chipCount) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
264
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
265 $prob = ($chipCount-$count)/$controlCount*1.1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
266
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
267 for my $line (values %hash) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
268 my $rand = rand();
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
269
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
270 if ($rand < $prob) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
271 print OUT $line;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
272 $count ++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
273 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
274 last if ($count == $chipCount);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
275 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
276 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
277
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
278 print "count = $count\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
279 close OUT;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
280