annotate 2.4/library/LevD.pm @ 16:8eb7d93f7e58 draft

Uploaded
author plus91-technologies-pvt-ltd
date Sat, 31 May 2014 11:23:36 -0400
parents e3609c8714fb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 package LevD;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 use lib "/data2/bsi/reference/softsearch/lib/perl5";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4 use strict;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5 use warnings;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6 use Data::Dumper;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7 use String::Approx 'adist';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8 use String::Approx 'adistr';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9 use String::Approx 'aindex';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11 my $WINDOW_SIZE = 100;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 sub new {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14 my ($class, $file) = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15 my $self = {};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17 bless($self,$class);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18 $self->init();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20 return $self;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23 sub init {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24 my ($self) = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26 #### default values.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 $self->{index} = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28 $self->{relative_edit_dist} = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29 $self->{edit_dist} = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32 sub search {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33 my ($self, $clip, $chr, $start, $stop, $ref) = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35 if (! -s $ref) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36 die "ERROR: Reference file $ref now found\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39 #### extact seq from reference file.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 my $target = $chr .":". $start ."-". $stop;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 my $cmd = "samtools faidx $ref $target";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 my @output = $self->_run_system_cmd($cmd);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 #### depending on ref file format seq could be on multiple lines
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46 #### concatinate all except for the header in one line.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47 #### e.g:
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48 #### >chr1:8222999-8223099
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49 #### GGTGCAATCATAGCTCACTAAGCTTCAACCTCAAGAGATCCTCCCACCTCAGCCTCCCAG
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 #### GTAGCTGGGACTACAGGCAAATGCCATGACACCTAGCTAAT
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 my $seq = join("", @output[1..$#output]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 #### remove new line character
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 $seq =~ s/\n//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56 #### find number of mismatches and start index
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57 #### of clip to be searched against target seq.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58 $self->{relative_edit_dist} = adistr($clip, $seq);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59 $self->{edit_dist} = adist($clip, $seq);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60 $self->{index} = aindex($clip, $seq);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63 sub _run_system_cmd {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64 my ($self, $cmd) = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65 my @cmd_output;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 eval {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68 @cmd_output = qx{$cmd 2>&1};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69 if ( ($? << 8) != 0 ) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70 die "@cmd_output";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
71 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
72 };
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
73 if ($@) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
74 die "Error executing command $cmd: $@";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
75 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
76
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
77 return @cmd_output;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
78 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
79
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
80 1;