diff getUniqSV.pl @ 0:acc8d8bfeb9a

Uploaded
author jjohnson
date Wed, 08 Feb 2012 16:59:24 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/getUniqSV.pl	Wed Feb 08 16:59:24 2012 -0500
@@ -0,0 +1,222 @@
+#!/nfs_exports/apps/64-bit/gnu-apps/perl5.8.9/bin/perl -w
+
+use strict;
+use Carp;
+use Getopt::Long;
+use English;
+use Pod::Usage;
+use Data::Dumper;
+use DBI;
+use DBD::mysql;
+
+my $db = 'hg18';
+my $port = 3306;
+my $host = '10.4.19.125';
+my $USER = "sjXuser";
+
+my ( $help, $man, $version, $usage );
+my ($in, $normal, $out);
+my $optionOK = GetOptions(
+	'd|i|in|input=s'	=> \$in,
+	'g|normal=s'	    => \$normal,
+	'o|out|output=s'	=> \$out,
+	'h|help|?'		=> \$help,
+	'man'			=> \$man,
+	'usage'			=> \$usage,
+	'v|version'		=> \$version,
+);
+
+pod2usage(2) if($man);
+pod2usage( -verbose => 99, -sections => "USAGE|REQUIRED ARGUMENTS|OPTIONS" ) 
+	if($help or $usage);
+pod2usage( -verbose => 99, -sections => "VERSION") if($version);
+
+croak "Missing input file" if(!$in);
+open STDOUT, ">$out" if($out);
+open my $IN, "<$in" or croak "can't open $in: $OS_ERROR";
+open my $NORMAL, "<$normal" or croak "can't open $normal: $OS_ERROR" if($normal);
+
+my $dbh = DBI->connect("dbi:mysql:$db:$host:$port", $USER) or
+	die "Can't connect to MySQL database: $DBI::errstr\n";
+
+my @SV;
+my @normal_SV;
+if($normal) {
+	while(my $line = <$NORMAL>) {
+		chomp $line;
+		my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line;
+		next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type));
+		push @normal_SV, [$chrA, $posA, $chrB, $posB, $type];
+	}
+}
+while( my $line = <$IN> ) {
+	chomp $line;
+	my ($chrA, $posA, $ortA, $countA, $chrB, $posB, $ortB, $countB, $type) = split /\t/, $line;
+	next if( search_sv(\@normal_SV, $chrA, $posA, $chrB, $posB, $type)); # the SV exists in normal sample
+	push @normal_SV, [$chrA, $posA, $chrB, $posB, $type];
+	print STDOUT $line;
+	if($type eq 'DEL') {
+		my $tmp = get_gene_info($chrA, $posA, $posB);
+		print STDOUT "\t", $tmp if($tmp);
+	}
+	else {
+		print STDOUT "\t", get_gene_info($chrA, $posA, $posA);
+		print STDOUT "\t", get_gene_info($chrB, $posB, $posB);
+	}
+	print STDOUT "\n";
+}
+close $IN;
+close STDOUT if($out);
+$dbh->disconnect;
+exit(0);
+
+sub search_sv {
+	my ($r_SV, $chrA, $posA, $chrB, $posB, $type) = @_;
+
+	foreach my $sv (@{$r_SV}) {
+		return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 20 &&
+			$sv->[2] eq $chrB && abs($sv->[3] - $posB) < 20 && $sv->[4] eq $type);
+		return 1 if( $sv->[0] eq $chrB && abs($sv->[1] - $posB) < 20 &&
+			$sv->[2] eq $chrA && abs($sv->[3] - $posA) < 20 && $sv->[4] eq $type);
+		return 1 if( $sv->[0] eq $chrA && abs($sv->[1] - $posA) < 100 &&
+			$sv->[2] eq $chrB && abs($sv->[3] - $posB) < 100 && $sv->[4] eq $type && $type eq 'CTX');
+		return 1 if( $sv->[0] eq $chrB && abs($sv->[1]- $posB) < 100 &&
+			$sv->[2] eq $chrA && abs($sv->[3] - $posA)  < 100  && $sv->[4] eq $type && $type eq 'CTX');
+	}
+	return;
+}
+
+sub get_gene_info {
+   my ($chr, $st, $ed) = @_;
+   $chr = 'chr' . $chr if($chr !~ m/chr/);
+	my $rtn="";
+   my $sth = $dbh->prepare("select distinct geneName 
+                           from refFlat
+                           where (chrom = '$chr' AND $ed >= txStart AND $st <= txEnd)
+                           order by txStart"
+                          );
+   $sth->execute();
+   my $tbl_ref = $sth->fetchall_arrayref();
+   foreach my $r (@{$tbl_ref}) {
+   		if($rtn eq '') { 
+			$rtn = $r->[0];
+		} 
+		else {
+			$rtn = $rtn . ", " .  $r->[0];
+		}
+	}
+	return $rtn;
+}
+=head1 NAME
+
+<application name> - <One-line description of application's purpose>
+
+
+=head1 VERSION
+
+The initial template usually just has:
+
+This documentation refers to <application name> version 0.0.1.
+
+
+=head1 USAGE
+
+    # Brief working invocation example(s) here showing the most common usage(s)
+
+	# This section will be as far as many users ever read,
+	# so make it as educational and exemplary as possible.
+=head1 REQUIRED ARGUMENTS
+
+A complete list of every argument that must appear on the command line.
+when the application  is invoked, explaining what each of them does, any
+restrictions on where each one may appear (i.e., flags that must appear
+before or after filenames), and how the various arguments and options
+may interact (e.g., mutual exclusions, required combinations, etc.)
+
+If all of the application's arguments are optional, this section
+may be omitted entirely.
+
+=head1 OPTIONS
+
+A complete list of every available option with which the application
+can be invoked, explaining what each does, and listing any restrictions,
+or interactions.
+
+If the application has no options, this section may be omitted entirely.
+
+
+=head1 DESCRIPTION
+
+A full description of the application and its features.
+May include numerous subsections (i.e., =head2, =head3, etc.).
+
+
+=head1 DIAGNOSTICS
+
+A list of every error and warning message that the application can generate
+(even the ones that will "never happen"), with a full explanation of each
+problem, one or more likely causes, and any suggested remedies. If the
+application generates exit status codes (e.g., under Unix), then list the exit
+status associated with each error.
+
+=head1 CONFIGURATION AND ENVIRONMENT
+
+A full explanation of any configuration system(s) used by the application,
+including the names and locations of any configuration files, and the
+meaning of any environment variables or properties that can be set. These
+descriptions must also include details of any configuration language used.
+
+
+=head1 DEPENDENCIES
+
+A list of all the other modules that this module relies upon, including any
+restrictions on versions, and an indication of whether these required modules are
+part of the standard Perl distribution, part of the module's distribution,
+or must be installed separately.
+
+
+=head1 INCOMPATIBILITIES
+
+A list of any modules that this module cannot be used in conjunction with.
+This may be due to name conflicts in the interface, or competition for
+system or program resources, or due to internal limitations of Perl
+(for example, many modules that use source code filters are mutually
+incompatible).
+
+
+=head1 BUGS AND LIMITATIONS
+
+A list of known problems with the module, together with some indication of
+whether they are likely to be fixed in an upcoming release.
+
+Also a list of restrictions on the features the module does provide:
+data types that cannot be handled, performance issues and the circumstances
+in which they may arise, practical limitations on the size of data sets,
+special cases that are not (yet) handled, etc.
+
+The initial template usually just has:
+
+There are no known bugs in this module.
+Please report problems to <Maintainer name(s)>  (<contact address>)
+Patches are welcome.
+
+=head1 AUTHOR
+
+<Author name(s)> (<contact address>)
+
+
+
+=head1 LICENCE AND COPYRIGHT
+
+Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved.
+
+followed by whatever licence you wish to release it under.
+For Perl code that is often just:
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself. See L<perlartistic>.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+