comparison getUniqSV.pl @ 0:acc8d8bfeb9a

Uploaded
author jjohnson
date Wed, 08 Feb 2012 16:59:24 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:acc8d8bfeb9a
1 #!/nfs_exports/apps/64-bit/gnu-apps/perl5.8.9/bin/perl -w
2
3 use strict;
4 use Carp;
5 use Getopt::Long;
6 use English;
7 use Pod::Usage;
8 use Data::Dumper;
9 use DBI;
10 use DBD::mysql;
11
12 my $db = 'hg18';
13 my $port = 3306;
14 my $host = '10.4.19.125';
15 my $USER = "sjXuser";
16
17 my ( $help, $man, $version, $usage );
18 my ($in, $normal, $out);
19 my $optionOK = GetOptions(
20 'd|i|in|input=s' => \$in,
21 'g|normal=s' => \$normal,
22 'o|out|output=s' => \$out,
23 'h|help|?' => \$help,
24 'man' => \$man,
25 'usage' => \$usage,
26 'v|version' => \$version,
27 );
28
29 pod2usage(2) if($man);
30 pod2usage( -verbose => 99, -sections => "USAGE|REQUIRED ARGUMENTS|OPTIONS" )
31 if($help or $usage);
32 pod2usage( -verbose => 99, -sections => "VERSION") if($version);
33
34 croak "Missing input file" if(!$in);
35 open STDOUT, ">$out" if($out);
36 open my $IN, "<$in" or croak "can't open $in: $OS_ERROR";
37 open my $NORMAL, "<$normal" or croak "can't open $normal: $OS_ERROR" if($normal);
38
39 my $dbh = DBI->connect("dbi:mysql:$db:$host:$port", $USER) or
40 die "Can't connect to MySQL database: $DBI::errstr\n";
41
42 my @SV;
43 my @normal_SV;
44 if($normal) {
45 while(my $line = <$NORMAL>) {
46 chomp $line;
47 my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line;
48 next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type));
49 push @normal_SV, [$chrA, $posA, $chrB, $posB, $type];
50 }
51 }
52 while( my $line = <$IN> ) {
53 chomp $line;
54 my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line;
55 next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type)); # the SV exists in normal sample
56 push @normal_SV, [$chrA, $posA, $chrB, $posB, $type];
57 print STDOUT $line;
58 if($type eq 'DEL') {
59 my $tmp = get_gene_info($chrA, $posA, $posB);
60 print STDOUT "\t", $tmp if($tmp);
61 }
62 else {
63 print STDOUT "\t", get_gene_info($chrA, $posA, $posA);
64 print STDOUT "\t", get_gene_info($chrB, $posB, $posB);
65 }
66 print STDOUT "\n";
67 }
68 close $IN;
69 close STDOUT if($out);
70 $dbh->disconnect;
71 exit(0);
72
73 sub search_sv {
74 my ($r_SV, $chrA, $posA, $chrB, $posB, $type) = @_;
75
76 foreach my $sv (@{$r_SV}) {
77 return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 20 &&
78 $sv->[2] eq $chrB && abs($sv->[3] - $posB) < 20 && $sv->[4] eq $type);
79 return 1 if( $sv->[0] eq $chrB && abs($sv->[1] - $posB) < 20 &&
80 $sv->[2] eq $chrA && abs($sv->[3] - $posA) < 20 && $sv->[4] eq $type);
81 return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 100 &&
82 $sv->[2] eq $chrB && abs($sv->[3] - $posB) < 100 && $sv->[4] eq $type && $type eq 'CTX');
83 return 1 if( $sv->[0] eq $chrB && abs($sv->[1]- $posB) < 100 &&
84 $sv->[2] eq $chrA && abs($sv->[3] - $posA) < 100 && $sv->[4] eq $type && $type eq 'CTX');
85 }
86 return;
87 }
88
89 sub get_gene_info {
90 my ($chr, $st, $ed) = @_;
91 $chr = 'chr' . $chr if($chr !~ m/chr/);
92 my $rtn="";
93 my $sth = $dbh->prepare("select distinct geneName
94 from refFlat
95 where (chrom = '$chr' AND $ed >= txStart AND $st <= txEnd)
96 order by txStart"
97 );
98 $sth->execute();
99 my $tbl_ref = $sth->fetchall_arrayref();
100 foreach my $r (@{$tbl_ref}) {
101 if($rtn eq '') {
102 $rtn = $r->[0];
103 }
104 else {
105 $rtn = $rtn . ", " . $r->[0];
106 }
107 }
108 return $rtn;
109 }
110 =head1 NAME
111
112 <application name> - <One-line description of application's purpose>
113
114
115 =head1 VERSION
116
117 The initial template usually just has:
118
119 This documentation refers to <application name> version 0.0.1.
120
121
122 =head1 USAGE
123
124 # Brief working invocation example(s) here showing the most common usage(s)
125
126 # This section will be as far as many users ever read,
127 # so make it as educational and exemplary as possible.
128 =head1 REQUIRED ARGUMENTS
129
130 A complete list of every argument that must appear on the command line.
131 when the application is invoked, explaining what each of them does, any
132 restrictions on where each one may appear (i.e., flags that must appear
133 before or after filenames), and how the various arguments and options
134 may interact (e.g., mutual exclusions, required combinations, etc.)
135
136 If all of the application's arguments are optional, this section
137 may be omitted entirely.
138
139 =head1 OPTIONS
140
141 A complete list of every available option with which the application
142 can be invoked, explaining what each does, and listing any restrictions,
143 or interactions.
144
145 If the application has no options, this section may be omitted entirely.
146
147
148 =head1 DESCRIPTION
149
150 A full description of the application and its features.
151 May include numerous subsections (i.e., =head2, =head3, etc.).
152
153
154 =head1 DIAGNOSTICS
155
156 A list of every error and warning message that the application can generate
157 (even the ones that will "never happen"), with a full explanation of each
158 problem, one or more likely causes, and any suggested remedies. If the
159 application generates exit status codes (e.g., under Unix), then list the exit
160 status associated with each error.
161
162 =head1 CONFIGURATION AND ENVIRONMENT
163
164 A full explanation of any configuration system(s) used by the application,
165 including the names and locations of any configuration files, and the
166 meaning of any environment variables or properties that can be set. These
167 descriptions must also include details of any configuration language used.
168
169
170 =head1 DEPENDENCIES
171
172 A list of all the other modules that this module relies upon, including any
173 restrictions on versions, and an indication of whether these required modules are
174 part of the standard Perl distribution, part of the module's distribution,
175 or must be installed separately.
176
177
178 =head1 INCOMPATIBILITIES
179
180 A list of any modules that this module cannot be used in conjunction with.
181 This may be due to name conflicts in the interface, or competition for
182 system or program resources, or due to internal limitations of Perl
183 (for example, many modules that use source code filters are mutually
184 incompatible).
185
186
187 =head1 BUGS AND LIMITATIONS
188
189 A list of known problems with the module, together with some indication of
190 whether they are likely to be fixed in an upcoming release.
191
192 Also a list of restrictions on the features the module does provide:
193 data types that cannot be handled, performance issues and the circumstances
194 in which they may arise, practical limitations on the size of data sets,
195 special cases that are not (yet) handled, etc.
196
197 The initial template usually just has:
198
199 There are no known bugs in this module.
200 Please report problems to <Maintainer name(s)> (<contact address>)
201 Patches are welcome.
202
203 =head1 AUTHOR
204
205 <Author name(s)> (<contact address>)
206
207
208
209 =head1 LICENCE AND COPYRIGHT
210
211 Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved.
212
213 followed by whatever licence you wish to release it under.
214 For Perl code that is often just:
215
216 This module is free software; you can redistribute it and/or
217 modify it under the same terms as Perl itself. See L<perlartistic>.
218
219 This program is distributed in the hope that it will be useful,
220 but WITHOUT ANY WARRANTY; without even the implied warranty of
221 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
222