annotate perl/lib/Fastq.pm @ 2:ea81b455dbf6 draft default tip

Uploaded
author stheil
date Thu, 15 Oct 2015 10:12:03 -0400
parents 3203097d0a70
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
3203097d0a70 Uploaded
stheil
parents:
diff changeset
1 package Tools::Fastq;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
2
3203097d0a70 Uploaded
stheil
parents:
diff changeset
3 use strict;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
4 use warnings;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
5 use Logger::Logger;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
6 use Storable;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
7
3203097d0a70 Uploaded
stheil
parents:
diff changeset
8
3203097d0a70 Uploaded
stheil
parents:
diff changeset
9 =head1 INDEXED FASTQ RELATED METHODS
3203097d0a70 Uploaded
stheil
parents:
diff changeset
10
3203097d0a70 Uploaded
stheil
parents:
diff changeset
11 =head2
3203097d0a70 Uploaded
stheil
parents:
diff changeset
12
3203097d0a70 Uploaded
stheil
parents:
diff changeset
13 =head2 new
3203097d0a70 Uploaded
stheil
parents:
diff changeset
14
3203097d0a70 Uploaded
stheil
parents:
diff changeset
15 =head2
3203097d0a70 Uploaded
stheil
parents:
diff changeset
16
3203097d0a70 Uploaded
stheil
parents:
diff changeset
17 =head3 Description
3203097d0a70 Uploaded
stheil
parents:
diff changeset
18
3203097d0a70 Uploaded
stheil
parents:
diff changeset
19 Create a new Tools::Fastq object and index the FASTQ file
3203097d0a70 Uploaded
stheil
parents:
diff changeset
20
3203097d0a70 Uploaded
stheil
parents:
diff changeset
21 =head3 Arguments
3203097d0a70 Uploaded
stheil
parents:
diff changeset
22
3203097d0a70 Uploaded
stheil
parents:
diff changeset
23 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
24
3203097d0a70 Uploaded
stheil
parents:
diff changeset
25 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
26
3203097d0a70 Uploaded
stheil
parents:
diff changeset
27 A hash of parameters.
3203097d0a70 Uploaded
stheil
parents:
diff changeset
28
3203097d0a70 Uploaded
stheil
parents:
diff changeset
29 Currently accepted keys are :
3203097d0a70 Uploaded
stheil
parents:
diff changeset
30
3203097d0a70 Uploaded
stheil
parents:
diff changeset
31 'file' => FASTQ file path
3203097d0a70 Uploaded
stheil
parents:
diff changeset
32
3203097d0a70 Uploaded
stheil
parents:
diff changeset
33 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
34
3203097d0a70 Uploaded
stheil
parents:
diff changeset
35 =head3 Returns
3203097d0a70 Uploaded
stheil
parents:
diff changeset
36
3203097d0a70 Uploaded
stheil
parents:
diff changeset
37 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
38
3203097d0a70 Uploaded
stheil
parents:
diff changeset
39 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
40
3203097d0a70 Uploaded
stheil
parents:
diff changeset
41 A Tools::Fastq object
3203097d0a70 Uploaded
stheil
parents:
diff changeset
42
3203097d0a70 Uploaded
stheil
parents:
diff changeset
43 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
44
3203097d0a70 Uploaded
stheil
parents:
diff changeset
45 =cut
3203097d0a70 Uploaded
stheil
parents:
diff changeset
46
3203097d0a70 Uploaded
stheil
parents:
diff changeset
47 sub new {
3203097d0a70 Uploaded
stheil
parents:
diff changeset
48 my ($class, %attrs) = @_;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
49 my $self = {};
3203097d0a70 Uploaded
stheil
parents:
diff changeset
50 bless $self;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
51 if(defined($attrs{file})){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
52 $self->{file} = $attrs{file};
3203097d0a70 Uploaded
stheil
parents:
diff changeset
53 open($self->{file_handle},$self->{file}) || $logger->logdie('Error opening file : '. $self->{file}.' : '.$!."\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
54 $self->indexFastqFile;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
55 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
56 return $self;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
57 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
58
3203097d0a70 Uploaded
stheil
parents:
diff changeset
59 =head2 indexFastqFile
3203097d0a70 Uploaded
stheil
parents:
diff changeset
60
3203097d0a70 Uploaded
stheil
parents:
diff changeset
61 =head2
3203097d0a70 Uploaded
stheil
parents:
diff changeset
62
3203097d0a70 Uploaded
stheil
parents:
diff changeset
63 =head3 Description
3203097d0a70 Uploaded
stheil
parents:
diff changeset
64
3203097d0a70 Uploaded
stheil
parents:
diff changeset
65 Index a FASTQ file creating a hash reference with the following structure :
3203097d0a70 Uploaded
stheil
parents:
diff changeset
66
3203097d0a70 Uploaded
stheil
parents:
diff changeset
67 $index -> {seq_id} = {'id_begin_position' => integer, 'id_length' => integer}
3203097d0a70 Uploaded
stheil
parents:
diff changeset
68
3203097d0a70 Uploaded
stheil
parents:
diff changeset
69 For each sequence id, the "@" symbol and all the text after space will be removed.
3203097d0a70 Uploaded
stheil
parents:
diff changeset
70
3203097d0a70 Uploaded
stheil
parents:
diff changeset
71 This cleaned id will be used as key for the index.
3203097d0a70 Uploaded
stheil
parents:
diff changeset
72
3203097d0a70 Uploaded
stheil
parents:
diff changeset
73 =head3 Arguments
3203097d0a70 Uploaded
stheil
parents:
diff changeset
74
3203097d0a70 Uploaded
stheil
parents:
diff changeset
75 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
76
3203097d0a70 Uploaded
stheil
parents:
diff changeset
77 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
78
3203097d0a70 Uploaded
stheil
parents:
diff changeset
79 None
3203097d0a70 Uploaded
stheil
parents:
diff changeset
80
3203097d0a70 Uploaded
stheil
parents:
diff changeset
81 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
82
3203097d0a70 Uploaded
stheil
parents:
diff changeset
83 =head3 Returns
3203097d0a70 Uploaded
stheil
parents:
diff changeset
84
3203097d0a70 Uploaded
stheil
parents:
diff changeset
85 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
86
3203097d0a70 Uploaded
stheil
parents:
diff changeset
87 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
88
3203097d0a70 Uploaded
stheil
parents:
diff changeset
89 None
3203097d0a70 Uploaded
stheil
parents:
diff changeset
90
3203097d0a70 Uploaded
stheil
parents:
diff changeset
91 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
92
3203097d0a70 Uploaded
stheil
parents:
diff changeset
93 =cut
3203097d0a70 Uploaded
stheil
parents:
diff changeset
94
3203097d0a70 Uploaded
stheil
parents:
diff changeset
95 sub indexFastqFile{
3203097d0a70 Uploaded
stheil
parents:
diff changeset
96
3203097d0a70 Uploaded
stheil
parents:
diff changeset
97 my ($self) = @_;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
98 $logger->info('Indexing file : '.$self->{file}."\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
99 my $index;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
100 my $id;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
101 my $id_begin_position = 0;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
102 my $fh = $self->{file_handle};
3203097d0a70 Uploaded
stheil
parents:
diff changeset
103 while(my $line = <$fh>){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
104
3203097d0a70 Uploaded
stheil
parents:
diff changeset
105 if($line =~ /^@(\S+)/){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
106
3203097d0a70 Uploaded
stheil
parents:
diff changeset
107 $id = $1;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
108 chomp $id;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
109 $index -> {$id} = {'id_begin_position' => $id_begin_position, 'id_length' => length $line};
3203097d0a70 Uploaded
stheil
parents:
diff changeset
110 $logger->trace('Indexing sequence' . $id . ' (position_begin_id : '. $index -> {$id}{'id_begin_position'} . ', id_length : '. $index -> {$id}{'id_length'} .') from ' . $self->{file} . "\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
111 <$fh>; <$fh>; <$fh>;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
112 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
113
3203097d0a70 Uploaded
stheil
parents:
diff changeset
114 $id_begin_position = tell($fh);
3203097d0a70 Uploaded
stheil
parents:
diff changeset
115 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
116
3203097d0a70 Uploaded
stheil
parents:
diff changeset
117 $logger->info('File '.$self->{file}.' is now indexed (index contains '.(scalar keys %$index)." sequences)\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
118 $self->{index} = $index;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
119 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
120
3203097d0a70 Uploaded
stheil
parents:
diff changeset
121 =head2 loadFastqIndexFile
3203097d0a70 Uploaded
stheil
parents:
diff changeset
122
3203097d0a70 Uploaded
stheil
parents:
diff changeset
123 =head2
3203097d0a70 Uploaded
stheil
parents:
diff changeset
124
3203097d0a70 Uploaded
stheil
parents:
diff changeset
125 =head3 Description
3203097d0a70 Uploaded
stheil
parents:
diff changeset
126
3203097d0a70 Uploaded
stheil
parents:
diff changeset
127 Retrieve index from file using Storable module
3203097d0a70 Uploaded
stheil
parents:
diff changeset
128
3203097d0a70 Uploaded
stheil
parents:
diff changeset
129 =head3 Arguments
3203097d0a70 Uploaded
stheil
parents:
diff changeset
130
3203097d0a70 Uploaded
stheil
parents:
diff changeset
131 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
132
3203097d0a70 Uploaded
stheil
parents:
diff changeset
133 An index file
3203097d0a70 Uploaded
stheil
parents:
diff changeset
134
3203097d0a70 Uploaded
stheil
parents:
diff changeset
135 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
136
3203097d0a70 Uploaded
stheil
parents:
diff changeset
137 =head3 Returns
3203097d0a70 Uploaded
stheil
parents:
diff changeset
138
3203097d0a70 Uploaded
stheil
parents:
diff changeset
139 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
140
3203097d0a70 Uploaded
stheil
parents:
diff changeset
141 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
142
3203097d0a70 Uploaded
stheil
parents:
diff changeset
143 A hash reference corresponding to the index of the input FASTQ file :
3203097d0a70 Uploaded
stheil
parents:
diff changeset
144
3203097d0a70 Uploaded
stheil
parents:
diff changeset
145 $index -> {seq_id} = {'id_begin_position' => integer, 'id_length' => integer}
3203097d0a70 Uploaded
stheil
parents:
diff changeset
146
3203097d0a70 Uploaded
stheil
parents:
diff changeset
147 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
148
3203097d0a70 Uploaded
stheil
parents:
diff changeset
149 =cut
3203097d0a70 Uploaded
stheil
parents:
diff changeset
150
3203097d0a70 Uploaded
stheil
parents:
diff changeset
151 sub loadFastqIndexFile{
3203097d0a70 Uploaded
stheil
parents:
diff changeset
152
3203097d0a70 Uploaded
stheil
parents:
diff changeset
153 my ($self, $file) = @_;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
154 $self->{index} = retrieve($file);
3203097d0a70 Uploaded
stheil
parents:
diff changeset
155 $logger->info('File '.$file." is now loaded\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
156 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
157
3203097d0a70 Uploaded
stheil
parents:
diff changeset
158 =he=head2 writeFastaIndexFile
3203097d0a70 Uploaded
stheil
parents:
diff changeset
159
3203097d0a70 Uploaded
stheil
parents:
diff changeset
160 =head2
3203097d0a70 Uploaded
stheil
parents:
diff changeset
161
3203097d0a70 Uploaded
stheil
parents:
diff changeset
162 =head3 Description
3203097d0a70 Uploaded
stheil
parents:
diff changeset
163
3203097d0a70 Uploaded
stheil
parents:
diff changeset
164 Write index to file using Storable module
3203097d0a70 Uploaded
stheil
parents:
diff changeset
165
3203097d0a70 Uploaded
stheil
parents:
diff changeset
166 =head3 Arguments
3203097d0a70 Uploaded
stheil
parents:
diff changeset
167
3203097d0a70 Uploaded
stheil
parents:
diff changeset
168 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
169
3203097d0a70 Uploaded
stheil
parents:
diff changeset
170 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
171
3203097d0a70 Uploaded
stheil
parents:
diff changeset
172 A hash reference corresponding to FASTQ index.
3203097d0a70 Uploaded
stheil
parents:
diff changeset
173
3203097d0a70 Uploaded
stheil
parents:
diff changeset
174 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
175
3203097d0a70 Uploaded
stheil
parents:
diff changeset
176 An output file path where to store the index.
3203097d0a70 Uploaded
stheil
parents:
diff changeset
177
3203097d0a70 Uploaded
stheil
parents:
diff changeset
178 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
179
3203097d0a70 Uploaded
stheil
parents:
diff changeset
180 =head3 Returns
3203097d0a70 Uploaded
stheil
parents:
diff changeset
181
3203097d0a70 Uploaded
stheil
parents:
diff changeset
182 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
183
3203097d0a70 Uploaded
stheil
parents:
diff changeset
184 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
185
3203097d0a70 Uploaded
stheil
parents:
diff changeset
186 The output file path containing index
3203097d0a70 Uploaded
stheil
parents:
diff changeset
187
3203097d0a70 Uploaded
stheil
parents:
diff changeset
188 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
189
3203097d0a70 Uploaded
stheil
parents:
diff changeset
190 =cut
3203097d0a70 Uploaded
stheil
parents:
diff changeset
191
3203097d0a70 Uploaded
stheil
parents:
diff changeset
192 sub writeFastqIndexFile{
3203097d0a70 Uploaded
stheil
parents:
diff changeset
193 my ($self, $file) = @_;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
194 $logger->info('Writing index ('.(scalar keys %{$self->{index}}).' sequences) in file : '.$file."\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
195 store $self->{index}, $file;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
196 $logger->info('File '.$file." is now created\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
197 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
198
3203097d0a70 Uploaded
stheil
parents:
diff changeset
199 =head2 retrieveFastqSequence
3203097d0a70 Uploaded
stheil
parents:
diff changeset
200
3203097d0a70 Uploaded
stheil
parents:
diff changeset
201 =head2
3203097d0a70 Uploaded
stheil
parents:
diff changeset
202
3203097d0a70 Uploaded
stheil
parents:
diff changeset
203 =head3 Description
3203097d0a70 Uploaded
stheil
parents:
diff changeset
204
3203097d0a70 Uploaded
stheil
parents:
diff changeset
205 Retrieve FASTQ sequences using a list of ids
3203097d0a70 Uploaded
stheil
parents:
diff changeset
206
3203097d0a70 Uploaded
stheil
parents:
diff changeset
207 =head3 Arguments
3203097d0a70 Uploaded
stheil
parents:
diff changeset
208
3203097d0a70 Uploaded
stheil
parents:
diff changeset
209 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
210
3203097d0a70 Uploaded
stheil
parents:
diff changeset
211 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
212
3203097d0a70 Uploaded
stheil
parents:
diff changeset
213 A sequence id OR an array reference containing the list of sequences id to retrieve.
3203097d0a70 Uploaded
stheil
parents:
diff changeset
214
3203097d0a70 Uploaded
stheil
parents:
diff changeset
215 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
216
3203097d0a70 Uploaded
stheil
parents:
diff changeset
217 =head3 Returns
3203097d0a70 Uploaded
stheil
parents:
diff changeset
218
3203097d0a70 Uploaded
stheil
parents:
diff changeset
219 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
220
3203097d0a70 Uploaded
stheil
parents:
diff changeset
221 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
222
3203097d0a70 Uploaded
stheil
parents:
diff changeset
223 A hash reference containing sequences id as keys and sequences as values
3203097d0a70 Uploaded
stheil
parents:
diff changeset
224
3203097d0a70 Uploaded
stheil
parents:
diff changeset
225 $data -> {seq_id} = sequence_corresponding_to_seq_id
3203097d0a70 Uploaded
stheil
parents:
diff changeset
226
3203097d0a70 Uploaded
stheil
parents:
diff changeset
227 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
228
3203097d0a70 Uploaded
stheil
parents:
diff changeset
229 =cut
3203097d0a70 Uploaded
stheil
parents:
diff changeset
230
3203097d0a70 Uploaded
stheil
parents:
diff changeset
231 sub retrieveFastqSequence{
3203097d0a70 Uploaded
stheil
parents:
diff changeset
232 my ($self, $ids) = @_;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
233 my $data={};
3203097d0a70 Uploaded
stheil
parents:
diff changeset
234 my $nbSequences = 0;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
235 if(! ref $ids){$ids = [$ids]}
3203097d0a70 Uploaded
stheil
parents:
diff changeset
236 $logger->debug('Retrieving sequences of '.scalar(@$ids).' ids from indexed file : '.$self->{file}."\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
237 my $fh = $self->{file_handle};
3203097d0a70 Uploaded
stheil
parents:
diff changeset
238 foreach my $id (@$ids){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
239 my $cleanedId = $id;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
240 if($id =~ /@(\S+)/){$cleanedId = $1}
3203097d0a70 Uploaded
stheil
parents:
diff changeset
241 $logger->trace('Retrieving informations of id ' . $cleanedId. " from index\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
242 if(exists $self->{index} -> {$cleanedId}){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
243 $logger->trace('id ' . $cleanedId . ' is present in index (id_begin_position : '. $self->{index} -> {$cleanedId}{'id_begin_position'}. ', id_length : '. $self->{index} -> {$cleanedId}{'id_length'}.")\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
244 seek($fh, $self->{index} -> {$cleanedId}{'id_begin_position'}, 0);
3203097d0a70 Uploaded
stheil
parents:
diff changeset
245 <$fh>;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
246 my $sequence = <$fh>;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
247 $data->{$id} = $sequence;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
248 $nbSequences ++;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
249 $logger->trace('Sequence of id '.$cleanedId.' is : ' . $sequence . "\n")
3203097d0a70 Uploaded
stheil
parents:
diff changeset
250 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
251 else{
3203097d0a70 Uploaded
stheil
parents:
diff changeset
252 $logger->trace('id ' . $cleanedId. " not found in index\n")
3203097d0a70 Uploaded
stheil
parents:
diff changeset
253 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
254 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
255 $logger->debug($nbSequences.'/'.scalar(@$ids).' sequences has been retrieved from indexed file ' . $self->{file} . "\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
256 return $data;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
257 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
258
3203097d0a70 Uploaded
stheil
parents:
diff changeset
259 =head2 retrieveFastqQuality
3203097d0a70 Uploaded
stheil
parents:
diff changeset
260
3203097d0a70 Uploaded
stheil
parents:
diff changeset
261 =head2
3203097d0a70 Uploaded
stheil
parents:
diff changeset
262
3203097d0a70 Uploaded
stheil
parents:
diff changeset
263 =head3 Description
3203097d0a70 Uploaded
stheil
parents:
diff changeset
264
3203097d0a70 Uploaded
stheil
parents:
diff changeset
265 Retrieve FASTQ sequences quality using a list of ids
3203097d0a70 Uploaded
stheil
parents:
diff changeset
266
3203097d0a70 Uploaded
stheil
parents:
diff changeset
267 =head3 Arguments
3203097d0a70 Uploaded
stheil
parents:
diff changeset
268
3203097d0a70 Uploaded
stheil
parents:
diff changeset
269 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
270
3203097d0a70 Uploaded
stheil
parents:
diff changeset
271 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
272
3203097d0a70 Uploaded
stheil
parents:
diff changeset
273 A sequence id OR an array reference containing the list of sequences id to retrieve quality.
3203097d0a70 Uploaded
stheil
parents:
diff changeset
274
3203097d0a70 Uploaded
stheil
parents:
diff changeset
275 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
276
3203097d0a70 Uploaded
stheil
parents:
diff changeset
277 =head3 Returns
3203097d0a70 Uploaded
stheil
parents:
diff changeset
278
3203097d0a70 Uploaded
stheil
parents:
diff changeset
279 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
280
3203097d0a70 Uploaded
stheil
parents:
diff changeset
281 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
282
3203097d0a70 Uploaded
stheil
parents:
diff changeset
283 A hash reference containing sequences id as keys and sequences quality as values
3203097d0a70 Uploaded
stheil
parents:
diff changeset
284
3203097d0a70 Uploaded
stheil
parents:
diff changeset
285 $data -> {seq_id} = sequence_quality_corresponding_to_seq_id
3203097d0a70 Uploaded
stheil
parents:
diff changeset
286
3203097d0a70 Uploaded
stheil
parents:
diff changeset
287 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
288
3203097d0a70 Uploaded
stheil
parents:
diff changeset
289 =cut
3203097d0a70 Uploaded
stheil
parents:
diff changeset
290
3203097d0a70 Uploaded
stheil
parents:
diff changeset
291 sub retrieveFastqQuality{
3203097d0a70 Uploaded
stheil
parents:
diff changeset
292 my ($self, $ids) = @_;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
293 my $data;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
294 my $nbSequences = 0;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
295 if(! ref $ids){$ids = [$ids]}
3203097d0a70 Uploaded
stheil
parents:
diff changeset
296 $logger->debug('Retrieving sequence quality of '.scalar(@$ids).' ids from indexed file : '.$self->{file}."\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
297 my $fh = $self->{file_handle};
3203097d0a70 Uploaded
stheil
parents:
diff changeset
298 foreach my $id (@$ids){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
299 my $cleanedId = $id;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
300 if($id =~ /@(\S+)/){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
301 $cleanedId = $1;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
302 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
303 $logger->trace('retrieving informations of id ' . $cleanedId. " from index\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
304 if(exists $self->{index} -> {$cleanedId}){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
305 $logger->trace('id ' . $cleanedId . ' is present in index (id_begin_position : '. $self->{index} -> {$cleanedId}{'id_begin_position'}. ', id_length : '. $self->{index} -> {$cleanedId}{'id_length'}.")\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
306 seek($fh, $self->{index} -> {$cleanedId}{'id_begin_position'}, 0);
3203097d0a70 Uploaded
stheil
parents:
diff changeset
307 my $quality .= <$fh>.<$fh>.<$fh>;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
308 $quality = <$fh>;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
309 $data .= $quality;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
310 $nbSequences ++;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
311 $logger->trace('Sequence quality of id '.$cleanedId.' is : ' . $quality. "\n")
3203097d0a70 Uploaded
stheil
parents:
diff changeset
312 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
313 else{
3203097d0a70 Uploaded
stheil
parents:
diff changeset
314 $logger->trace('id ' . $cleanedId. " not found in index\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
315 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
316 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
317 $logger->debug($nbSequences.'/'.scalar(@$ids).' sequences qualities has been retrieved from indexed file ' . $self->{file} . "\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
318 return $data;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
319 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
320
3203097d0a70 Uploaded
stheil
parents:
diff changeset
321 =head2 retrieveFastqBlock
3203097d0a70 Uploaded
stheil
parents:
diff changeset
322
3203097d0a70 Uploaded
stheil
parents:
diff changeset
323 =head2
3203097d0a70 Uploaded
stheil
parents:
diff changeset
324
3203097d0a70 Uploaded
stheil
parents:
diff changeset
325 =head3 Description
3203097d0a70 Uploaded
stheil
parents:
diff changeset
326
3203097d0a70 Uploaded
stheil
parents:
diff changeset
327 Retrieve FASTQ formatted sequences using a list of ids
3203097d0a70 Uploaded
stheil
parents:
diff changeset
328
3203097d0a70 Uploaded
stheil
parents:
diff changeset
329 =head3 Arguments
3203097d0a70 Uploaded
stheil
parents:
diff changeset
330
3203097d0a70 Uploaded
stheil
parents:
diff changeset
331 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
332
3203097d0a70 Uploaded
stheil
parents:
diff changeset
333 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
334
3203097d0a70 Uploaded
stheil
parents:
diff changeset
335 A sequence id OR an array reference containing the list of sequences id to retrieve.
3203097d0a70 Uploaded
stheil
parents:
diff changeset
336
3203097d0a70 Uploaded
stheil
parents:
diff changeset
337 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
338
3203097d0a70 Uploaded
stheil
parents:
diff changeset
339 =head3 Returns
3203097d0a70 Uploaded
stheil
parents:
diff changeset
340
3203097d0a70 Uploaded
stheil
parents:
diff changeset
341 =over 4
3203097d0a70 Uploaded
stheil
parents:
diff changeset
342
3203097d0a70 Uploaded
stheil
parents:
diff changeset
343 =item
3203097d0a70 Uploaded
stheil
parents:
diff changeset
344
3203097d0a70 Uploaded
stheil
parents:
diff changeset
345 A scalar containing the sequences corresponding to ids in FASTQ format
3203097d0a70 Uploaded
stheil
parents:
diff changeset
346
3203097d0a70 Uploaded
stheil
parents:
diff changeset
347 =back
3203097d0a70 Uploaded
stheil
parents:
diff changeset
348
3203097d0a70 Uploaded
stheil
parents:
diff changeset
349 =cut
3203097d0a70 Uploaded
stheil
parents:
diff changeset
350
3203097d0a70 Uploaded
stheil
parents:
diff changeset
351 sub retrieveFastqBlock{
3203097d0a70 Uploaded
stheil
parents:
diff changeset
352 my ($self, $ids) = @_;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
353 my $data;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
354 my $nbSequences = 0;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
355
3203097d0a70 Uploaded
stheil
parents:
diff changeset
356 if(! ref $ids){$ids = [$ids]}
3203097d0a70 Uploaded
stheil
parents:
diff changeset
357
3203097d0a70 Uploaded
stheil
parents:
diff changeset
358 $logger->trace('Retrieving fastq block of '.scalar(@$ids).' ids from indexed file : '.$self->{file}."\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
359 my $fh = $self->{file_handle};
3203097d0a70 Uploaded
stheil
parents:
diff changeset
360 foreach my $id (@$ids){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
361 my $cleanedId = $id;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
362 if($id =~ /@(\S+)/){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
363 $cleanedId = $1;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
364 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
365 $logger->trace('Retrieving informations of id ' . $cleanedId. " from index\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
366 if(exists $self->{index} -> {$cleanedId}){
3203097d0a70 Uploaded
stheil
parents:
diff changeset
367 $logger->trace('id ' . $cleanedId . ' is present in index (id_begin_position : '. $self->{index} -> {$cleanedId}{'id_begin_position'}. ', id_length : '. $self->{index} -> {$cleanedId}{'id_length'}.")\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
368 seek($fh, $self->{index} -> {$cleanedId}{'id_begin_position'}, 0);
3203097d0a70 Uploaded
stheil
parents:
diff changeset
369 read($fh, my $block, $self->{index} -> {$cleanedId}{'id_length'});
3203097d0a70 Uploaded
stheil
parents:
diff changeset
370 $block .= <$fh>.<$fh>.<$fh>;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
371 $data .= $block;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
372 $nbSequences++;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
373 $logger->trace('fastq block of id '.$cleanedId.' is : ' ."\n". $block. "\n")
3203097d0a70 Uploaded
stheil
parents:
diff changeset
374 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
375 else{
3203097d0a70 Uploaded
stheil
parents:
diff changeset
376 $logger->trace('id ' . $cleanedId. " not found in index\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
377 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
378 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
379 $logger->trace($nbSequences.'/'.scalar(@$ids).' fastq block has been retrieved from indexed file ' . $self->{file} . "\n");
3203097d0a70 Uploaded
stheil
parents:
diff changeset
380 return $data;
3203097d0a70 Uploaded
stheil
parents:
diff changeset
381 }
3203097d0a70 Uploaded
stheil
parents:
diff changeset
382 1;