Mercurial > repos > stheil > readpercontig_blat
comparison perl/scripts/readPerContig.pl @ 1:3203097d0a70 draft
Uploaded
author | stheil |
---|---|
date | Thu, 15 Oct 2015 09:53:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:9730db7c9ad3 | 1:3203097d0a70 |
---|---|
1 #!/usr/bin/perl | |
2 use strict; | |
3 use Data::Dumper; | |
4 use Logger::Logger; | |
5 use File::Basename; | |
6 use Getopt::Long; | |
7 use Cwd 'abs_path' ; | |
8 use Tools::Fasta ; | |
9 use Tools::Fastq ; | |
10 | |
11 | |
12 my $ref=''; | |
13 my @p1=''; | |
14 my @p2=''; | |
15 my @read=''; | |
16 my $identity = 95; | |
17 my $coverage = 60; | |
18 my $verbosity=1; | |
19 my $format; | |
20 my $stat; | |
21 my $blatOutput=''; | |
22 my $statOutput=''; | |
23 | |
24 GetOptions( | |
25 "r=s" => \$ref, | |
26 "id=i" => \$identity, | |
27 "cov=i" => \$coverage, | |
28 "b|blat" => \$blatOutput, | |
29 "o|output=s" => \$statOutput, | |
30 "f=s" => \$format, | |
31 "s=s" => \$stat, | |
32 "v|verbosity=i" => \$verbosity | |
33 ); | |
34 | |
35 Logger::Logger->changeMode($verbosity); | |
36 | |
37 &main; | |
38 | |
39 sub main { | |
40 my $self = {}; | |
41 bless $self; | |
42 $self->_setOptions(); | |
43 | |
44 $self->_checkFormat($format); | |
45 | |
46 $self->{ref_tool} = Tools::Fasta->new(file => $self->{reference}); | |
47 | |
48 $self->_parseBlat(); | |
49 | |
50 $self->_print($self->{_statistics}, $self->{statOutput}); | |
51 } | |
52 | |
53 | |
54 sub _parseBlat { | |
55 my ($self) = @_; | |
56 open(PSL,$self->{blatOutput}) || $logger->logdie('Cant read psl file ' . $self->{blatOutput}); | |
57 while(<PSL>){ | |
58 chomp; | |
59 my @line = split(/\t/,$_); | |
60 my $tSize = $line[14]; | |
61 my $tStart = $line[15]; | |
62 my $tEnd = $line[16]; | |
63 my $coverage = (($tEnd-$tStart)/$tSize) * 100; | |
64 | |
65 if($coverage >= $self->{_covThreshold}){ | |
66 $self->{readNumber}->{$line[9]}++; | |
67 $self->{totalReadNumber}++; | |
68 $self->{size}->{$line[9]} = $line[10]; | |
69 $self->{reads_size_sum}->{$line[9]} += $tSize; | |
70 } | |
71 else{ | |
72 next; | |
73 } | |
74 } | |
75 close PSL; | |
76 } | |
77 | |
78 | |
79 sub _print { | |
80 my ($self, $mode, $statOutput) = @_; | |
81 $logger->info('Printing results ...'); | |
82 | |
83 if(!defined $mode){ | |
84 $mode = 'nb_reads'; | |
85 } | |
86 | |
87 if(!defined $statOutput){ | |
88 $statOutput = $self->{reference} . '.rn'; | |
89 } | |
90 | |
91 open(OUT,">$statOutput"); | |
92 foreach my $ref (sort(keys(%{$self->{ref_tool}->{index}}))){ | |
93 my $value = 0; | |
94 if(defined($self->{readNumber}->{$ref})){ | |
95 if($mode eq 'nb_reads'){ | |
96 $value = $self->{readNumber}->{$ref}; | |
97 } | |
98 elsif($mode eq 'rpkm'){ | |
99 $value = $self->{readNumber}->{$ref} / ($self->{size}->{$ref}/1000 * $self->{totalReadNumber}/1000000); | |
100 } | |
101 elsif($mode eq 'mean_coverage'){ | |
102 $value = $self->{reads_size_sum}->{$ref} / $self->{size}->{$ref}; | |
103 } | |
104 } | |
105 print OUT $ref . "\t" . $value . "\n"; | |
106 } | |
107 close OUT; | |
108 } | |
109 | |
110 | |
111 sub _setOptions { | |
112 my ($self) = @_; | |
113 if(-e $ref){ | |
114 $self->{reference} = abs_path($ref); | |
115 } | |
116 else{ | |
117 $logger->error('You must provide a reference.'); | |
118 &help; | |
119 } | |
120 if($identity >= 0 && $identity <= 100){ | |
121 $self->{_idThreshold} = $identity; | |
122 } | |
123 else{ | |
124 $logger->logdie('ERROR: identity must be between 0 and 100.'); | |
125 } | |
126 if($coverage >=0 && $coverage <= 100){ | |
127 $self->{_covThreshold} = $coverage; | |
128 } | |
129 else{ | |
130 $logger->logdie('ERROR: coverage must be between 0 and 100.'); | |
131 } | |
132 if($stat =~ /nb_reads|rpkm|mean_coverage/){ | |
133 $self->{_statistics} = $stat; | |
134 } | |
135 else{ | |
136 $logdie(''); | |
137 } | |
138 if($blatOutput eq ''){ | |
139 $self->{blatOutput} = 'blat.psl'; | |
140 } | |
141 else{ | |
142 $self->{blatOutput} = $blatOutput; | |
143 } | |
144 if($statOutput eq ''){ | |
145 my($filename, $dirs, $suffix) = fileparse($ref, qr/\.[^.]*/); | |
146 $statOutput = $filename . '.psl'; | |
147 } | |
148 else{ | |
149 $self->{statOutput} = $statOutput; | |
150 } | |
151 } | |
152 | |
153 | |
154 sub help { | |
155 my $prog = basename($0) ; | |
156 print STDERR <<EOF ; | |
157 #### $prog #### | |
158 # | |
159 # AUTHOR: Sebastien THEIL | |
160 # LAST MODIF: 16/07/2015 | |
161 # PURPOSE: This script is basically used to map reads (using blat) on contigs (or scaffolds) and count the | |
162 number of read per contig. | |
163 Input can be multiple fasta or fastq. | |
164 Output is a 2 column tabular file. | |
165 | |
166 USAGE: | |
167 $prog -i singl.fastq -i singl.fasta -1 R1.fastq -2 R2.fastq .... | |
168 | |
169 ### OPTIONS ### | |
170 -r <string> Rererence sequences file. | |
171 -id <int> Identity threshold. (Default: $identity) | |
172 -cov <int> Coverage threshold. (Default: $coverage) | |
173 -b <string> Blat output file path | |
174 -o <string> Output file path | |
175 -s <string> Statistics to print : nb_reads, rpkm or mean_coverage. | |
176 -v <int> Verbosity level. (0 -> 4). | |
177 EOF | |
178 exit(1); | |
179 } |