Mercurial > repos > plus > archer
comparison archer.pl @ 3:3af9b7634b2d draft default tip
Uploaded
author | plus |
---|---|
date | Thu, 29 May 2014 02:32:55 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:d5aed166429d | 3:3af9b7634b2d |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use warnings; | |
5 | |
6 my($i, $j, $k); | |
7 my $parameters = {}; | |
8 | |
9 sub usage { | |
10 print "\nUsage: $0 \n\n\t "; | |
11 print "REQUIRED \n\t "; | |
12 print "-config <config_file> \n\n"; | |
13 exit(1); | |
14 } | |
15 if(scalar(@ARGV) == 0){ | |
16 usage(); | |
17 } | |
18 | |
19 # Parse the Command Line | |
20 &parse_command_line($parameters, @ARGV); | |
21 | |
22 # Log File | |
23 my $log_file = $parameters->{config_file} . ".log"; | |
24 unless ( open(LOG_FILE, ">$log_file") ) { | |
25 print "Cannot open file \"$log_file\" to write to!!\n\n"; | |
26 exit; | |
27 } | |
28 print LOG_FILE "config = $parameters->{config_file}\n"; | |
29 | |
30 # Time Stamp | |
31 my $timestamp = localtime(time); | |
32 print LOG_FILE $timestamp, "\n"; | |
33 | |
34 # Parse Config File | |
35 my @samples = (); | |
36 my $number_of_samples = &parse_config_file(\@samples, \$parameters); | |
37 print LOG_FILE "directory = $parameters->{directory}\n\n"; | |
38 | |
39 my $files = {}; | |
40 # Create Target Regions File | |
41 $files->{target_regions_file} = $parameters->{directory} . "/target_regions.dat"; | |
42 &create_target_regions_file(\$parameters, \$files, \*LOG_FILE); | |
43 | |
44 my @tags = (); | |
45 my($reads, $tag); | |
46 my($fastq_file_1, $fastq_file_2); | |
47 my %proceed = (); | |
48 my $alignment_string = ''; | |
49 my $number_of_alignment_files = 0; | |
50 my $cmd = ''; | |
51 | |
52 for($i = 0; $i < $number_of_samples; $i++){ | |
53 | |
54 # Define Tags | |
55 &define_tags(\@samples, \@tags, \$reads, \$tag, \$fastq_file_1, \$fastq_file_2, \$parameters, \*LOG_FILE); | |
56 | |
57 # Check if FASTQ Files Exist and are Non-empty | |
58 $proceed{$i} = &decide_to_proceed(\$reads, \$fastq_file_1, \$fastq_file_2); | |
59 | |
60 # Create String of FASTQ Files to be Aligned by bwa_enz | |
61 &create_alignment_string($proceed{$i}, \$reads, \$number_of_alignment_files, \$alignment_string, \$fastq_file_1, \$fastq_file_2); | |
62 } | |
63 | |
64 # Align the reads with bwa_enz | |
65 # Would be better to align paired reads together?? | |
66 $cmd = &align_reads(\$parameters, $number_of_alignment_files, $alignment_string); | |
67 print LOG_FILE $cmd; | |
68 system($cmd); | |
69 | |
70 for($i = 0; $i < $number_of_samples; $i++){ | |
71 | |
72 # Define Tags | |
73 &define_tags(\@samples, \@tags, \$reads, \$tag, \$fastq_file_1, \$fastq_file_2, \$parameters, \*LOG_FILE); | |
74 | |
75 # Define Summary File Names | |
76 &define_summary_file_names(\$tag, \$parameters, \$files); | |
77 | |
78 if ( $proceed{$i} == 1 ){ | |
79 | |
80 # Convert SAM -> BAM -> BED | |
81 &define_alignments_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); | |
82 $cmd = &rename_sam_files(\$reads, \$files); | |
83 $cmd .= &alignments(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files); | |
84 print LOG_FILE $cmd; | |
85 system($cmd); | |
86 | |
87 # De-duplicate the SAM File(s) | |
88 &define_de_duplication_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); | |
89 $cmd = &de_duplication(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files); | |
90 print LOG_FILE $cmd; | |
91 system($cmd); | |
92 # What if only reverse read exists? Don't want to de-duplicate? | |
93 | |
94 # Select On-/Off-Target Reads | |
95 # Split marked files into two files | |
96 &define_on_and_off_target_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); | |
97 $cmd = &select_on_and_off_target_reads(\$reads, \$parameters, \$files); | |
98 print LOG_FILE $cmd; | |
99 system($cmd); | |
100 | |
101 # Coverage and Start Sites | |
102 &define_coverage_and_start_site_file_names(\$reads, \@tags, \$parameters, \$files); | |
103 $cmd = &generate_coverage_and_start_sites(\$reads, \$parameters, \$files); | |
104 print LOG_FILE $cmd; | |
105 system($cmd); | |
106 | |
107 # Proceed through the rest of the pipeline using the on-target reads | |
108 | |
109 # Create Master Files - one line per read | |
110 &define_master_files_file_names(\$reads, \@tags, \$parameters, \$files); | |
111 $cmd = &generate_master_files(\$reads, \$parameters, \$files); | |
112 print LOG_FILE $cmd; | |
113 system($cmd); | |
114 | |
115 # Select Fusion Reads - do not count mapping to a 'novel', i.e., not in refseq, region as a fusion | |
116 &define_fusion_reads_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); | |
117 $cmd = &select_fusion_reads(\$reads, \$parameters, \$files); | |
118 print LOG_FILE $cmd; | |
119 system($cmd); | |
120 | |
121 # Count Fusions | |
122 &define_count_fusions_file_names(\$tag, \$parameters, \$files); | |
123 $cmd = &count_fusions(\$reads, \$parameters, \$files); | |
124 print LOG_FILE $cmd; | |
125 system($cmd); | |
126 | |
127 # Flanking Sequences | |
128 &define_flanking_sequences_file_names(\$tag, \$parameters, \$files); | |
129 $cmd = &flanking_sequences(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files); | |
130 print LOG_FILE $cmd; | |
131 system($cmd); | |
132 | |
133 # BAM Dedup Files | |
134 &define_bam_dedup_files_file_names(\$reads, \@tags, \$parameters, \$files); | |
135 $cmd = &bam_dedup_files(\$reads, \$files); | |
136 print LOG_FILE $cmd; | |
137 system($cmd); | |
138 | |
139 # Consensus Sequences - Fusion and Splice | |
140 #&define_consensus_sequences_file_names(\$tag, \$parameters, \$files); | |
141 #$cmd = &consensus_sequences(\$reads, \$fastq_file_1, \$fastq_file_2, \$tag, \$parameters, \$files); | |
142 #print LOG_FILE $cmd; | |
143 #system($cmd); | |
144 | |
145 # Sort SAM Files | |
146 &define_sort_sam_files_file_names(\$reads, \@tags, \$parameters, \$files); | |
147 $cmd = &sort_sam_files(\$reads, \$files); | |
148 print LOG_FILE $cmd; | |
149 system($cmd); | |
150 | |
151 # On-target Stats | |
152 &define_on_target_stats_file_names(\$tag, \$parameters, \$files); | |
153 $cmd = &on_target_stats(\$reads, \$parameters, \$files); | |
154 print LOG_FILE $cmd; | |
155 system($cmd); | |
156 | |
157 # Total Molecule Counts | |
158 &define_total_molecule_counts_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); | |
159 $cmd = &total_molecule_counts(\$reads, \$parameters, \$files); | |
160 print LOG_FILE $cmd; | |
161 system($cmd); | |
162 | |
163 # De-duplicated Molecule Counts | |
164 &define_de_deduplicated_molecule_counts_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); | |
165 $cmd = &de_duplicated_molecule_counts(\$reads, \$parameters, \$files); | |
166 print LOG_FILE $cmd; | |
167 system($cmd); | |
168 | |
169 # All Molecule Counts | |
170 &define_all_molecule_counts_file_names(\$tag, \$parameters, \$files); | |
171 $cmd = &all_molecule_counts(\$parameters, \$files); | |
172 print LOG_FILE $cmd; | |
173 system($cmd); | |
174 | |
175 # QC Check | |
176 &define_qc_check_file_names(\$tag, \$parameters, \$files); | |
177 $cmd = &qc_check(\$parameters, \$files); | |
178 print LOG_FILE $cmd; | |
179 system($cmd); | |
180 | |
181 # Coverage Uniformity | |
182 &define_coverage_uniformity_file_names(\$tag, \$parameters, \$files); | |
183 $cmd = &coverage_uniformity(\$reads, \$parameters, \$files); | |
184 print LOG_FILE $cmd; | |
185 system($cmd); | |
186 | |
187 # Summary | |
188 $cmd = &summary(\$tag, \$parameters, \$files); | |
189 print LOG_FILE $cmd; | |
190 system($cmd); | |
191 | |
192 # Clean Up | |
193 $cmd = &clean_up(\$reads, \$parameters, \$files); | |
194 print LOG_FILE $cmd; | |
195 system($cmd); | |
196 } | |
197 else{ | |
198 # Summary for Unprocessed Sample | |
199 $cmd = &summary_for_unprocessed_sample(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files); | |
200 print LOG_FILE $cmd; | |
201 system($cmd); | |
202 } | |
203 | |
204 # Time Stamp | |
205 $timestamp = localtime(time); | |
206 print LOG_FILE $timestamp, "\n"; | |
207 } | |
208 | |
209 # Join Multiple Samples | |
210 #$cmd = &join_multiple_samples(\$parameters); | |
211 #print LOG_FILE $cmd; | |
212 #system($cmd); | |
213 | |
214 close(LOG_FILE); | |
215 | |
216 exit; | |
217 | |
218 sub parse_command_line { | |
219 my($parameters, @ARGV) = @_; | |
220 my $next_arg; | |
221 while(scalar @ARGV > 0){ | |
222 $next_arg = shift(@ARGV); | |
223 if($next_arg eq "-config"){ $parameters->{config_file} = shift (@ARGV); } | |
224 } | |
225 } | |
226 | |
227 | |
228 sub parse_config_file { | |
229 my $samples_ref = shift; | |
230 my $parameters_ref = shift; | |
231 my @values = (); | |
232 my $count = 0; | |
233 open( FILE, "< $$parameters_ref->{config_file}" ) or die "Can't open $$parameters_ref->{config_file} : $!"; | |
234 while( <FILE> ) { | |
235 chomp; | |
236 if ( length($_) > 1 and $_ !~ /^\#/ ){ | |
237 @values = (); | |
238 @values = split(/=/, $_); | |
239 if($values[0] eq 'sample'){ | |
240 @$samples_ref[$count] = $values[1]; | |
241 $count++; | |
242 } | |
243 else{ | |
244 $$parameters_ref->{$values[0]} = $values[1]; | |
245 } | |
246 } | |
247 } | |
248 my $num_samples = scalar @$samples_ref; | |
249 print "number of samples = $num_samples\n"; | |
250 return $num_samples; | |
251 } | |
252 | |
253 | |
254 sub create_target_regions_file { | |
255 my $parameters_ref = shift; | |
256 my $files_ref = shift; | |
257 my $file_handle_ref = shift; | |
258 my $cmd = ''; | |
259 | |
260 if ( ($$parameters_ref->{control_regions_file} ne 'NULL') && ($$parameters_ref->{target_regions_file} ne 'NULL') ) { | |
261 $cmd = "cat $$parameters_ref->{control_regions_file} $$parameters_ref->{target_regions_file} > $$files_ref->{target_regions_file}\n"; | |
262 print $file_handle_ref $cmd; | |
263 system($cmd); | |
264 } | |
265 elsif ( $$parameters_ref->{target_regions_file} ne 'NULL' ) { | |
266 $$files_ref->{target_regions_file} = $$parameters_ref->{target_regions_file}; | |
267 } | |
268 elsif ( $$parameters_ref->{control_regions_file} ne 'NULL' ) { | |
269 $$files_ref->{target_regions_file} = $$parameters_ref->{control_regions_file}; | |
270 } | |
271 else { # Create Target Regions File | |
272 my $label; | |
273 my $target_file = $$parameters_ref->{directory} . "/target_file.dat"; | |
274 my $control_file = $$parameters_ref->{directory} . "/control_file.dat"; | |
275 my $target_temp_outputfile_1 = $$parameters_ref->{directory} . "/target_temp_1.dat"; | |
276 my $target_temp_outputfile_2 = $$parameters_ref->{directory} . "/target_temp_2.dat"; | |
277 my $control_temp_outputfile_1 = $$parameters_ref->{directory} . "/control_temp_1.dat"; | |
278 my $control_temp_outputfile_2 = $$parameters_ref->{directory} . "/control_temp_2.dat"; | |
279 my $path_to_annotation_script = $$parameters_ref->{path} . "/archer/annotation/"; | |
280 # Target Primers Fasta File | |
281 if ( -e $$parameters_ref->{target_primers} ) { | |
282 if ( -s $$parameters_ref->{target_primers} ) { | |
283 $label = 'fusion'; | |
284 $cmd = "$$parameters_ref->{path}/create_target_regions_file.pl -target $$parameters_ref->{target_primers} -label $label -refseq $$parameters_ref->{refseq_file} -gtf_file $$parameters_ref->{gtf_file} -path $path_to_annotation_script -t1 $target_temp_outputfile_1 -t2 $target_temp_outputfile_2 -o $target_file\n"; | |
285 print $file_handle_ref $cmd; | |
286 system($cmd); | |
287 } | |
288 } | |
289 # Control Primers Fasta File | |
290 if ( -e $$parameters_ref->{control_primers} ) { | |
291 if ( -s $$parameters_ref->{control_primers} ) { | |
292 $label = 'housekeeping'; | |
293 $cmd = "$$parameters_ref->{path}/create_target_regions_file.pl -target $$parameters_ref->{control_primers} -label $label -refseq $$parameters_ref->{refseq_file} -gtf_file $$parameters_ref->{gtf_file} -path $path_to_annotation_script -t1 $control_temp_outputfile_1 -t2 $control_temp_outputfile_2 -o $control_file\n"; | |
294 print $file_handle_ref $cmd; | |
295 system($cmd); | |
296 } | |
297 } | |
298 if ( -e $control_file ) { | |
299 if ( -s $control_file ) { | |
300 $cmd = "cp $control_file $$files_ref->{target_regions_file}\n"; | |
301 if ( -e $target_file ) { | |
302 if ( -s $target_file ) { | |
303 $cmd .= "cat $target_file >> $$files_ref->{target_regions_file}\n"; | |
304 } | |
305 } | |
306 print $file_handle_ref $cmd; | |
307 system($cmd); | |
308 } | |
309 } | |
310 elsif ( -e $target_file ) { | |
311 if ( -s $target_file ) { | |
312 $cmd = "cp $target_file $$files_ref->{target_regions_file}\n"; | |
313 print $file_handle_ref $cmd; | |
314 system($cmd); | |
315 } | |
316 } | |
317 $cmd = ''; | |
318 if ( -e $target_file ) { | |
319 $cmd .= "rm $target_temp_outputfile_1\n"; | |
320 $cmd .= "rm $target_temp_outputfile_2\n"; | |
321 } | |
322 if ( -e $control_file ) { | |
323 $cmd .= "rm $control_temp_outputfile_1\n"; | |
324 $cmd .= "rm $control_temp_outputfile_2\n"; | |
325 } | |
326 print $file_handle_ref $cmd; | |
327 system($cmd); | |
328 | |
329 $cmd = ''; | |
330 if ( -e $target_file ) { | |
331 $cmd .= "rm $target_file\n"; | |
332 } | |
333 if ( -e $control_file ) { | |
334 $cmd .= "rm $control_file\n"; | |
335 } | |
336 print $file_handle_ref $cmd; | |
337 system($cmd); | |
338 | |
339 print $file_handle_ref "\n"; | |
340 } | |
341 } | |
342 | |
343 | |
344 sub define_tags { | |
345 my $samples_array_ref = shift; | |
346 my $tags_array_ref = shift; | |
347 my $reads_ref = shift; | |
348 my $tag_ref = shift; | |
349 my $fastq_file_1_ref = shift; | |
350 my $fastq_file_2_ref = shift; | |
351 my $parameters_ref = shift; | |
352 my $file_handle_ref = shift; | |
353 | |
354 @$tags_array_ref = (); | |
355 @$tags_array_ref = split(/\s+/, @$samples_array_ref[$i]); # Split samples on whitespace | |
356 | |
357 if( (scalar @$tags_array_ref) == 1 ){ | |
358 $$reads_ref = 'single'; | |
359 $$tag_ref = $tags[0]; | |
360 $$fastq_file_1_ref = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fastq"; | |
361 } | |
362 else{ | |
363 $$reads_ref = 'paired'; | |
364 $$tag_ref = @$tags_array_ref[0] . "_" . @$tags_array_ref[1]; | |
365 $$fastq_file_1_ref = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fastq"; | |
366 $$fastq_file_2_ref = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".fastq"; | |
367 } | |
368 | |
369 print $$tag_ref, "\n"; | |
370 | |
371 print $file_handle_ref $$tag_ref, "\n"; | |
372 } | |
373 | |
374 | |
375 sub decide_to_proceed { | |
376 my $reads_ref = shift; | |
377 my $fastq_file_1_ref = shift; | |
378 my $fastq_file_2_ref = shift; | |
379 my $proceed = 0; | |
380 | |
381 if ( $$reads_ref eq 'single' ) { | |
382 if ( -e $$fastq_file_1_ref ) { | |
383 if ( -s $$fastq_file_1_ref ) { | |
384 $proceed = 1; | |
385 } | |
386 } | |
387 else{ | |
388 $$fastq_file_1_ref .= ".gz"; # See if fastq_file_1 exists in gzipped form | |
389 if ( -e $$fastq_file_1_ref ) { | |
390 if ( -s $$fastq_file_1_ref ) { | |
391 $proceed = 1; | |
392 } | |
393 } | |
394 } | |
395 } | |
396 if ( $$reads_ref eq 'paired' ) { | |
397 if ( -e $$fastq_file_1_ref ) { | |
398 if ( -s $$fastq_file_1_ref ) { | |
399 if ( -e $$fastq_file_2_ref ) { | |
400 if ( -s $$fastq_file_2_ref ) { | |
401 $proceed = 1; | |
402 } | |
403 } | |
404 else{ | |
405 $$fastq_file_2_ref .= ".gz"; # See if fastq_file_2 exists in gzipped form | |
406 if ( -e $$fastq_file_2_ref ) { | |
407 if ( -s $$fastq_file_2_ref ) { | |
408 $proceed = 1; | |
409 } | |
410 } | |
411 } | |
412 } | |
413 } | |
414 else{ | |
415 $$fastq_file_1_ref .= ".gz"; # See if fastq_file_1 exists in gzipped form | |
416 if ( -e $$fastq_file_1_ref ) { | |
417 if ( -s $$fastq_file_1_ref ) { | |
418 if ( -e $$fastq_file_2_ref ) { | |
419 if ( -s $$fastq_file_2_ref ) { | |
420 $proceed = 1; | |
421 } | |
422 } | |
423 else{ | |
424 $$fastq_file_2_ref .= ".gz"; # See if fastq_file_2 exists in gzipped form | |
425 if ( -e $$fastq_file_2_ref ) { | |
426 if ( -s $$fastq_file_2_ref ) { | |
427 $proceed = 1; | |
428 } | |
429 } | |
430 } | |
431 } | |
432 } | |
433 } | |
434 } | |
435 return $proceed; | |
436 } | |
437 | |
438 | |
439 sub create_alignment_string { | |
440 my $proceed_value = shift; | |
441 my $reads_ref = shift; | |
442 my $number_of_alignment_files_ref = shift; | |
443 my $alignment_string_ref = shift; | |
444 my $fastq_file_1_ref = shift; | |
445 my $fastq_file_2_ref = shift; | |
446 | |
447 if ( $proceed_value == 1 ) { | |
448 if ( $$number_of_alignment_files_ref == 0 ){ | |
449 $$alignment_string_ref = $$fastq_file_1_ref; | |
450 } | |
451 else { | |
452 $$alignment_string_ref .= " " . $$fastq_file_1_ref; | |
453 } | |
454 $$number_of_alignment_files_ref++; | |
455 if ( $$reads_ref eq 'paired' ) { | |
456 $$alignment_string_ref .= " " . $$fastq_file_2_ref; | |
457 $$number_of_alignment_files_ref++; | |
458 } | |
459 } | |
460 } | |
461 | |
462 | |
463 sub align_reads { | |
464 my $parameters_ref = shift; | |
465 my $number_of_alignment_files_value = shift; | |
466 my $alignment_string_value = shift; | |
467 my $cmd_line = "echo Align Reads\n"; | |
468 if ( $number_of_alignment_files_value > 0 ) { | |
469 $cmd_line .= "bwa_enz mem -Q 0 -m -D $$parameters_ref->{directory} $$parameters_ref->{reference_file} $alignment_string_value\n"; | |
470 } | |
471 return $cmd_line; | |
472 } | |
473 | |
474 | |
475 sub rename_sam_files { | |
476 my $reads_ref = shift; | |
477 my $files_ref = shift; | |
478 my $cmd_line = "echo Rename SAM Files\n"; | |
479 $cmd_line .= "mv $$files_ref->{sam_file_1_orig} $$files_ref->{sam_file_1_full}\n"; | |
480 if ( $$reads_ref eq 'paired' ) { | |
481 $cmd_line .= "mv $$files_ref->{sam_file_2_orig} $$files_ref->{sam_file_2_full}\n"; | |
482 } | |
483 return $cmd_line; | |
484 } | |
485 | |
486 | |
487 sub alignments { | |
488 my $reads_ref = shift; | |
489 my $fastq_file_1_ref = shift; | |
490 my $fastq_file_2_ref = shift; | |
491 my $parameters_ref = shift; | |
492 my $files_ref = shift; | |
493 my $cmd_line = "echo Alignments\n"; | |
494 | |
495 # -S input is SAM | |
496 # -b output is BAM | |
497 # -h print header for the SAM output | |
498 | |
499 # $cmd_line .= "bwa mem $$parameters_ref->{reference_file} $$fastq_file_1_ref > $$files_ref->{sam_file_1_full}\n"; | |
500 $cmd_line .= "samtools view -Shq 40 $$files_ref->{sam_file_1_full} > $$files_ref->{sam_file_1}\n"; | |
501 $cmd_line .= "samtools view -bS $$files_ref->{sam_file_1} > $$files_ref->{bam_file_1}\n"; | |
502 $cmd_line .= "bamToBed -i $$files_ref->{bam_file_1} > $$files_ref->{bed_file_1_orig}\n"; | |
503 if ( $$reads_ref eq 'single' ) { | |
504 $cmd_line .= "mv $$files_ref->{bed_file_1_orig} $$files_ref->{bed_file_combined}\n"; | |
505 } | |
506 elsif ( $$reads_ref eq 'paired' ) { | |
507 # $cmd_line .= "bwa mem $$parameters_ref->{reference_file} $$fastq_file_2_ref > $$files_ref->{sam_file_2_full}\n"; | |
508 $cmd_line .= "samtools view -Shq 40 $$files_ref->{sam_file_2_full} > $$files_ref->{sam_file_2}\n"; | |
509 $cmd_line .= "samtools view -bS $$files_ref->{sam_file_2} > $$files_ref->{bam_file_2}\n"; | |
510 $cmd_line .= "bamToBed -i $$files_ref->{bam_file_2} > $$files_ref->{bed_file_2_orig}\n"; | |
511 $cmd_line .= "cat $$files_ref->{bed_file_1_orig} $$files_ref->{bed_file_2_orig} > $$files_ref->{bed_file_combined}\n"; | |
512 $cmd_line .= "rm $$files_ref->{bed_file_1_orig}\n"; | |
513 $cmd_line .= "rm $$files_ref->{bed_file_2_orig}\n"; | |
514 } | |
515 return $cmd_line; | |
516 } | |
517 | |
518 | |
519 sub de_duplication { | |
520 my $reads_ref = shift; | |
521 my $fastq_file_1_ref = shift; | |
522 my $fastq_file_2_ref = shift; | |
523 my $parameters_ref = shift; | |
524 my $files_ref = shift; | |
525 my $cmd_line = "echo De-duplication\n"; | |
526 | |
527 if ( $$reads_ref eq 'single' ) { | |
528 $cmd_line .= "sort -k4,4 $$files_ref->{bed_file_combined}|$$parameters_ref->{path}/dedup_pipeline.sh -p $$parameters_ref->{path} -b /dev/stdin -f $$fastq_file_1_ref > $$files_ref->{dedup_file}\n"; | |
529 } | |
530 else{ | |
531 $cmd_line .= "sort -k4,4 $$files_ref->{bed_file_combined}|$$parameters_ref->{path}/dedup_pipeline.sh -p $$parameters_ref->{path} -b /dev/stdin -f $$fastq_file_1_ref -2 $$fastq_file_2_ref > $$files_ref->{dedup_file}\n"; | |
532 } | |
533 $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_1} -o $$files_ref->{sam_dedup_file_1}\n"; | |
534 $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_1_full} -o $$files_ref->{sam_dedup_file_1_full}\n"; | |
535 if ( $$reads_ref eq 'paired' ) { | |
536 $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_2} -o $$files_ref->{sam_dedup_file_2}\n"; | |
537 $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_2_full} -o $$files_ref->{sam_dedup_file_2_full}\n"; | |
538 } | |
539 return $cmd_line; | |
540 } | |
541 | |
542 | |
543 sub select_on_and_off_target_reads { | |
544 my $reads_ref = shift; | |
545 my $parameters_ref = shift; | |
546 my $files_ref = shift; | |
547 my $cmd_line = "echo Select On- and Off-target Reads\n"; | |
548 | |
549 # Create BED file of target regions | |
550 $cmd_line .= "$$parameters_ref->{path}/convert_target_regions_to_bed.pl -t $$files_ref->{target_regions_file} -o $$files_ref->{target_regions_bed_file}\n"; | |
551 | |
552 # All Reads | |
553 # Create a master file from the SAM file | |
554 # Join the master file | |
555 # Select the appropriate segment for each read id and create a BED file - need to have only one entry for each read in the BED file | |
556 # Convert BED files of reads to single points - do this so that only start of R2 and end of R1 are counted in overlapping with the target regions so that only the target region that overlaps with these end points will be counted as being hit - assumes that there are no overlapping regions in the target regions file | |
557 # Get intersection of reads with target regions | |
558 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_file_1} -o $$files_ref->{full_master_prejoin_file_1}\n"; | |
559 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_prejoin_file_1} -o $$files_ref->{full_master_file_1}\n"; | |
560 $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_file_1}\n"; | |
561 $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_points_file_1}\n"; | |
562 $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_file_1} -wa -wb > $$files_ref->{intersect_file_1}\n"; | |
563 if ( $$reads_ref eq 'paired' ) { | |
564 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_file_2} -o $$files_ref->{full_master_prejoin_file_2}\n"; | |
565 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_prejoin_file_2} -o $$files_ref->{full_master_file_2}\n"; | |
566 $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_file_2}\n"; | |
567 $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_points_file_2}\n"; | |
568 $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_file_2} -wa -wb > $$files_ref->{intersect_file_2}\n"; | |
569 $cmd_line .= "cat $$files_ref->{intersect_file_1} $$files_ref->{intersect_file_2} > $$files_ref->{intersect_file_combined}\n"; | |
570 } | |
571 # Create SAM files of the on- and off-target reads. On-target files contain all reads in which at least one of R1/R2 is on-target. Off-target files contain the remaining reads. | |
572 # On-/Off-target Alone Read 1 | |
573 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_1} -i $$files_ref->{intersect_file_1} -on $$files_ref->{sam_on_target_alone_file_1}\n"; | |
574 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_1} -on $$files_ref->{sam_on_target_alone_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_1_marked}\n"; | |
575 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_1_marked} > $$files_ref->{sam_off_target_alone_file_1}\n"; | |
576 if ( $$reads_ref eq 'paired' ) { | |
577 # On-/Off-target Alone Read 2 | |
578 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_2} -i $$files_ref->{intersect_file_2} -on $$files_ref->{sam_on_target_alone_file_2}\n"; | |
579 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_2} -on $$files_ref->{sam_on_target_alone_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_2_marked}\n"; | |
580 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_2_marked} > $$files_ref->{sam_off_target_alone_file_2}\n"; | |
581 # On-/Off-target Either Read 1 | |
582 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_1} -i $$files_ref->{intersect_file_combined} -on $$files_ref->{sam_on_target_file_1}\n"; | |
583 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_1} -on $$files_ref->{sam_on_target_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_1_marked}\n"; | |
584 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_1_marked} > $$files_ref->{sam_off_target_file_1}\n"; | |
585 # On-/Off-target Either Read 2 | |
586 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_2} -i $$files_ref->{intersect_file_combined} -on $$files_ref->{sam_on_target_file_2}\n"; | |
587 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_2} -on $$files_ref->{sam_on_target_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_2_marked}\n"; | |
588 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_2_marked} > $$files_ref->{sam_off_target_file_2}\n"; | |
589 } | |
590 | |
591 # De-duplicated Reads | |
592 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_file_1} -o $$files_ref->{full_master_dedup_prejoin_file_1}\n"; | |
593 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_dedup_prejoin_file_1} -o $$files_ref->{full_master_dedup_file_1}\n"; | |
594 $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_dedup_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_dedup_file_1}\n"; | |
595 $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_dedup_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_points_dedup_file_1}\n"; | |
596 $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_dedup_file_1} -wa -wb > $$files_ref->{intersect_dedup_file_1}\n"; | |
597 if ( $$reads_ref eq 'paired' ) { | |
598 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_file_2} -o $$files_ref->{full_master_dedup_prejoin_file_2}\n"; | |
599 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_dedup_prejoin_file_2} -o $$files_ref->{full_master_dedup_file_2}\n"; | |
600 $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_dedup_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_dedup_file_2}\n"; | |
601 $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_dedup_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_points_dedup_file_2}\n"; | |
602 $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_dedup_file_2} -wa -wb > $$files_ref->{intersect_dedup_file_2}\n"; | |
603 $cmd_line .= "cat $$files_ref->{intersect_dedup_file_1} $$files_ref->{intersect_dedup_file_2} > $$files_ref->{intersect_dedup_file_combined}\n"; | |
604 } | |
605 # Create SAM files of the on- and off-target reads. On-target files contain all reads in which at least one of R1/R2 is on-target. Off-target files contain the remaining reads. | |
606 # On-/Off-target Alone Read 1 | |
607 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_1} -i $$files_ref->{intersect_dedup_file_1} -on $$files_ref->{sam_dedup_on_target_alone_file_1}\n"; | |
608 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_1} -on $$files_ref->{sam_dedup_on_target_alone_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_1_marked}\n"; | |
609 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_1_marked} > $$files_ref->{sam_dedup_off_target_alone_file_1}\n"; | |
610 if ( $$reads_ref eq 'paired' ) { | |
611 # On-/Off-target Alone Read 2 | |
612 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_2} -i $$files_ref->{intersect_dedup_file_2} -on $$files_ref->{sam_dedup_on_target_alone_file_2}\n"; | |
613 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_2} -on $$files_ref->{sam_dedup_on_target_alone_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_2_marked}\n"; | |
614 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_2_marked} > $$files_ref->{sam_dedup_off_target_alone_file_2}\n"; | |
615 # On-/Off-target Either Read 1 | |
616 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_1} -i $$files_ref->{intersect_dedup_file_combined} -on $$files_ref->{sam_dedup_on_target_file_1}\n"; | |
617 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_1} -on $$files_ref->{sam_dedup_on_target_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_1_marked}\n"; | |
618 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_1_marked} > $$files_ref->{sam_dedup_off_target_file_1}\n"; | |
619 # On-/Off-target Either Read 2 | |
620 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_2} -i $$files_ref->{intersect_dedup_file_combined} -on $$files_ref->{sam_dedup_on_target_file_2}\n"; | |
621 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_2} -on $$files_ref->{sam_dedup_on_target_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_2_marked}\n"; | |
622 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_2_marked} > $$files_ref->{sam_dedup_off_target_file_2}\n"; | |
623 } | |
624 return $cmd_line; | |
625 } | |
626 | |
627 | |
628 sub generate_coverage_and_start_sites { | |
629 my $reads_ref = shift; | |
630 my $parameters_ref = shift; | |
631 my $files_ref = shift; | |
632 my $cmd_line = "echo Generate Coverage and Start Sites\n"; | |
633 | |
634 $cmd_line .= "bash $$parameters_ref->{path}/generateHistAndStartSiteInfo.sh $$files_ref->{sam_dedup_file_1} $$parameters_ref->{reference_file} $$parameters_ref->{reference_file_index} $$files_ref->{start_site_dedup_file_1} $$files_ref->{coverage_dedup_file_1} $$parameters_ref->{path}\n"; | |
635 | |
636 if ( $$reads_ref eq 'paired' ) { | |
637 $cmd_line .= "bash $$parameters_ref->{path}/generateHistAndStartSiteInfo.sh $$files_ref->{sam_dedup_file_2} $$parameters_ref->{reference_file} $$parameters_ref->{reference_file_index} $$files_ref->{start_site_dedup_file_2} $$files_ref->{coverage_dedup_file_2} $$parameters_ref->{path}\n"; | |
638 } | |
639 | |
640 return $cmd_line; | |
641 } | |
642 | |
643 | |
644 sub generate_master_files { | |
645 my $reads_ref = shift; | |
646 my $parameters_ref = shift; | |
647 my $files_ref = shift; | |
648 my $cmd_line = "echo Generate Master Files\n"; | |
649 | |
650 # Create a master file of all reads with one line per read | |
651 if ( $$reads_ref eq 'single' ) { | |
652 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_on_target_alone_file_1} -o $$files_ref->{master_dedup_no_annotation_file_1}\n"; | |
653 } | |
654 else{ | |
655 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_on_target_file_1} -o $$files_ref->{master_dedup_no_annotation_file_1}\n"; | |
656 } | |
657 | |
658 $cmd_line .= "python $$parameters_ref->{path}/archer/annotation/annotate.py --gtf_file $$parameters_ref->{gtf_file} --coordinate_file $$files_ref->{master_dedup_no_annotation_file_1} --outfile $$files_ref->{master_dedup_prejoin_file_1} --chromosome_indices 1,1 --coordinate_indices 5,6\n"; | |
659 | |
660 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{master_dedup_prejoin_file_1} -o $$files_ref->{master_dedup_file_1}\n"; | |
661 | |
662 if ( $$reads_ref eq 'paired' ) { | |
663 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_on_target_file_2} -o $$files_ref->{master_dedup_no_annotation_file_2}\n"; | |
664 | |
665 $cmd_line .= "python $$parameters_ref->{path}/archer/annotation/annotate.py --gtf_file $$parameters_ref->{gtf_file} --coordinate_file $$files_ref->{master_dedup_no_annotation_file_2} --outfile $$files_ref->{master_dedup_prejoin_file_2} --chromosome_indices 1,1 --coordinate_indices 5,6\n"; | |
666 | |
667 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{master_dedup_prejoin_file_2} -o $$files_ref->{master_dedup_file_2}\n"; | |
668 } | |
669 | |
670 return $cmd_line; | |
671 } | |
672 | |
673 sub select_fusion_reads { | |
674 my $reads_ref = shift; | |
675 my $parameters_ref = shift; | |
676 my $files_ref = shift; | |
677 my $cmd_line = "echo Select Fusion Reads\n"; | |
678 | |
679 $cmd_line .= "$$parameters_ref->{path}/select_fusion_reads.pl -master $$files_ref->{master_dedup_file_1} -tag 1 -o1 $$files_ref->{one_segment_reads_file_1} -os $$files_ref->{splice_reads_file_1} -of $$files_ref->{fusion_reads_file_1} -omf $$files_ref->{multi_fusion_reads_file_1}\n"; | |
680 | |
681 if ( $$reads_ref eq 'single' ) { | |
682 $cmd_line .= "cp $$files_ref->{fusion_reads_file_1} $$files_ref->{fusion_reads_file}\n"; | |
683 $cmd_line .= "cp $$files_ref->{splice_reads_file_1} $$files_ref->{splice_reads_file}\n"; | |
684 } | |
685 | |
686 if ( $$reads_ref eq 'paired' ) { | |
687 $cmd_line .= "$$parameters_ref->{path}/select_fusion_reads.pl -master $$files_ref->{master_dedup_file_2} -tag 2 -o1 $$files_ref->{one_segment_reads_file_2} -os $$files_ref->{splice_reads_file_2} -of $$files_ref->{fusion_reads_file_2} -omf $$files_ref->{multi_fusion_reads_file_2}\n"; | |
688 $cmd_line .= "cat $$files_ref->{fusion_reads_file_1} $$files_ref->{fusion_reads_file_2} > $$files_ref->{fusion_reads_file}\n"; | |
689 $cmd_line .= "cat $$files_ref->{splice_reads_file_1} $$files_ref->{splice_reads_file_2} > $$files_ref->{splice_reads_file}\n"; | |
690 } | |
691 | |
692 return $cmd_line; | |
693 } | |
694 | |
695 sub count_fusions { | |
696 my $reads_ref = shift; | |
697 my $parameters_ref = shift; | |
698 my $files_ref = shift; | |
699 my $cmd_line = "echo Count Fusions and Splice Events\n"; | |
700 | |
701 # Count the number of each type of fusion pair, i.e., Gene A Exon X with Gene B Exon Y, get the median value of the coordinate of each breakpoint, sort and output each type | |
702 $cmd_line .= "$$parameters_ref->{path}/count_fusions.pl -t $$files_ref->{target_regions_file} -fr $$files_ref->{fusion_reads_file} -min 30 -gtf $$parameters_ref->{gtf_file} -read $$reads_ref -limit 10 -min_occ 5 -ob $$files_ref->{fusion_counts_bare_file} -o $$files_ref->{fusion_counts_file}\n"; | |
703 | |
704 # Count the number of each type of splice pair - Use default values for -limit and -min_occ so that all splices will be reported | |
705 $cmd_line .= "$$parameters_ref->{path}/count_fusions.pl -t $$files_ref->{target_regions_file} -fr $$files_ref->{splice_reads_file} -min 30 -gtf $$parameters_ref->{gtf_file} -read $$reads_ref -ob $$files_ref->{splice_counts_bare_file} -o $$files_ref->{splice_counts_file}\n"; | |
706 | |
707 # Add splice evidence to fusion counts | |
708 $cmd_line .= "$$parameters_ref->{path}/add_splice_to_fusion_counts.pl -fcb $$files_ref->{fusion_counts_bare_file} -scb $$files_ref->{splice_counts_bare_file} -o $$files_ref->{fusion_counts_with_splice_bare_file} -om $$files_ref->{fusion_counts_with_splice_bare_file_machine}\n"; | |
709 | |
710 return $cmd_line; | |
711 } | |
712 | |
713 | |
714 sub flanking_sequences { | |
715 my $reads_ref = shift; | |
716 my $fastq_file_1_ref = shift; | |
717 my $fastq_file_2_ref = shift; | |
718 my $parameters_ref = shift; | |
719 my $files_ref = shift; | |
720 my $cmd_line = "echo Flanking Sequences\n"; | |
721 | |
722 if ( $$reads_ref eq 'single' ) { | |
723 $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{fusion_counts_bare_file} -fr $$files_ref->{fusion_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -o $$files_ref->{flanking_sequences_file}\n"; | |
724 $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{splice_counts_bare_file} -fr $$files_ref->{splice_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -o $$files_ref->{flanking_splice_sequences_file}\n"; | |
725 } | |
726 else{ | |
727 $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{fusion_counts_bare_file} -fr $$files_ref->{fusion_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -fastq_2 $$fastq_file_2_ref -o $$files_ref->{flanking_sequences_file}\n"; | |
728 $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{splice_counts_bare_file} -fr $$files_ref->{splice_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -fastq_2 $$fastq_file_2_ref -o $$files_ref->{flanking_splice_sequences_file}\n"; | |
729 } | |
730 return $cmd_line; | |
731 } | |
732 | |
733 | |
734 sub bam_dedup_files { | |
735 my $reads_ref = shift; | |
736 my $files_ref = shift; | |
737 my $cmd_line = "echo BAM Dedup Files\n"; | |
738 # Make sorted de-dup BAM files | |
739 $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_1} > $$files_ref->{bam_dedup_file_1}\n"; | |
740 $cmd_line .= "samtools sort $$files_ref->{bam_dedup_file_1} $$files_ref->{bam_dedup_sorted_file_1_name}\n"; | |
741 $cmd_line .= "samtools index $$files_ref->{bam_dedup_sorted_file_1}\n"; | |
742 if ( $$reads_ref eq 'paired' ) { | |
743 $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_2} > $$files_ref->{bam_dedup_file_2}\n"; | |
744 $cmd_line .= "samtools sort $$files_ref->{bam_dedup_file_2} $$files_ref->{bam_dedup_sorted_file_2_name}\n"; | |
745 $cmd_line .= "samtools index $$files_ref->{bam_dedup_sorted_file_2}\n"; | |
746 } | |
747 return $cmd_line; | |
748 } | |
749 | |
750 | |
751 sub consensus_sequences { | |
752 my $reads_ref = shift; | |
753 my $fastq_file_1_ref = shift; | |
754 my $fastq_file_2_ref = shift; | |
755 my $tag_ref = shift; | |
756 my $parameters_ref = shift; | |
757 my $files_ref = shift; | |
758 my $cmd_line = "echo Consensus Sequences\n"; | |
759 | |
760 # Fusion and Splice Consensus Sequences | |
761 if ( $$reads_ref eq 'single' ) { | |
762 $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -a $$fastq_file_1_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_fusion_std_out_file} -e $$files_ref->{consensus_fusion_std_err_file}\n"; | |
763 $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -a $$fastq_file_1_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_splice_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_splice_std_out_file} -e $$files_ref->{consensus_splice_std_err_file}\n"; | |
764 } | |
765 else{ | |
766 $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -2 $$files_ref->{bam_dedup_sorted_file_2} -a $$fastq_file_1_ref -b $$fastq_file_2_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_fusion_std_out_file} -e $$files_ref->{consensus_fusion_std_err_file}\n"; | |
767 $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -2 $$files_ref->{bam_dedup_sorted_file_2} -a $$fastq_file_1_ref -b $$fastq_file_2_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_splice_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_splice_std_out_file} -e $$files_ref->{consensus_splice_std_err_file}\n"; | |
768 } | |
769 | |
770 # Pair Fusion Candidates with Splice Sequences | |
771 $cmd_line .= "$$parameters_ref->{path}/pair_fusion_and_splice_sequences.pl -fc $$files_ref->{fusion_counts_bare_file} -sc $$files_ref->{splice_counts_bare_file} -tag $$tag_ref -o $$files_ref->{fusion_and_splice_consensus_file}\n"; | |
772 | |
773 return $cmd_line; | |
774 } | |
775 | |
776 | |
777 sub sort_sam_files { | |
778 my $reads_ref = shift; | |
779 my $files_ref = shift; | |
780 my $cmd_line = "echo Sort SAM Files\n"; | |
781 | |
782 $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_alone_file_1} > $$files_ref->{sam_on_target_alone_file_1_linux_sorted}\n"; | |
783 $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_alone_file_1} > $$files_ref->{sam_off_target_alone_file_1_linux_sorted}\n"; | |
784 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_alone_file_1} > $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted}\n"; | |
785 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_alone_file_1} > $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted}\n"; | |
786 | |
787 $cmd_line .= "samtools view -bS $$files_ref->{sam_file_1_full} > $$files_ref->{bam_file_1_full}\n"; | |
788 $cmd_line .= "samtools sort -n $$files_ref->{bam_file_1_full} $$files_ref->{bam_file_1_full_prefix}\n"; | |
789 $cmd_line .= "samtools view -h $$files_ref->{bam_file_1_full_sorted} > $$files_ref->{sam_file_1_full_sorted}\n"; | |
790 $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_1_full} > $$files_ref->{bam_dedup_file_1_full}\n"; | |
791 $cmd_line .= "samtools sort -n $$files_ref->{bam_dedup_file_1_full} $$files_ref->{bam_dedup_file_1_full_prefix}\n"; | |
792 $cmd_line .= "samtools view -h $$files_ref->{bam_dedup_file_1_full_sorted} > $$files_ref->{sam_dedup_file_1_full_sorted}\n"; | |
793 | |
794 if ( $$reads_ref eq 'paired' ) { # Need to make this possible to be reverse only too | |
795 | |
796 $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_file_1} > $$files_ref->{sam_on_target_file_1_linux_sorted}\n"; | |
797 $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_file_1} > $$files_ref->{sam_off_target_file_1_linux_sorted}\n"; | |
798 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_file_1} > $$files_ref->{sam_dedup_on_target_file_1_linux_sorted}\n"; | |
799 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_file_1} > $$files_ref->{sam_dedup_off_target_file_1_linux_sorted}\n"; | |
800 $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_alone_file_2} > $$files_ref->{sam_on_target_alone_file_2_linux_sorted}\n"; | |
801 $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_alone_file_2} > $$files_ref->{sam_off_target_alone_file_2_linux_sorted}\n"; | |
802 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_alone_file_2} > $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted}\n"; | |
803 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_alone_file_2} > $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted}\n"; | |
804 $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_file_2} > $$files_ref->{sam_on_target_file_2_linux_sorted}\n"; | |
805 $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_file_2} > $$files_ref->{sam_off_target_file_2_linux_sorted}\n"; | |
806 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_file_2} > $$files_ref->{sam_dedup_on_target_file_2_linux_sorted}\n"; | |
807 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_file_2} > $$files_ref->{sam_dedup_off_target_file_2_linux_sorted}\n"; | |
808 | |
809 $cmd_line .= "samtools view -bS $$files_ref->{sam_file_2_full} > $$files_ref->{bam_file_2_full}\n"; | |
810 $cmd_line .= "samtools sort -n $$files_ref->{bam_file_2_full} $$files_ref->{bam_file_2_full_prefix}\n"; | |
811 $cmd_line .= "samtools view -h $$files_ref->{bam_file_2_full_sorted} > $$files_ref->{sam_file_2_full_sorted}\n"; | |
812 $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_2_full} > $$files_ref->{bam_dedup_file_2_full}\n"; | |
813 $cmd_line .= "samtools sort -n $$files_ref->{bam_dedup_file_2_full} $$files_ref->{bam_dedup_file_2_full_prefix}\n"; | |
814 $cmd_line .= "samtools view -h $$files_ref->{bam_dedup_file_2_full_sorted} > $$files_ref->{sam_dedup_file_2_full_sorted}\n"; | |
815 } | |
816 return $cmd_line; | |
817 } | |
818 | |
819 | |
820 sub on_target_stats { | |
821 my $reads_ref = shift; | |
822 my $parameters_ref = shift; | |
823 my $files_ref = shift; | |
824 my $cmd_line = "echo On-target Stats\n"; | |
825 | |
826 if ( -e $$files_ref->{target_regions_file} ) { | |
827 if ( -s $$files_ref->{target_regions_file} ) { | |
828 | |
829 if ( $$reads_ref eq 'single' ) { # Need to make this possible to be reverse only too | |
830 | |
831 # Counts of on- and off-target reads | |
832 $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_on_target_alone_file_1_linux_sorted} -off_alone_1 $$files_ref->{sam_off_target_alone_file_1_linux_sorted} -o $$files_ref->{on_target_file}\n"; | |
833 $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted} -off_alone_1 $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted} -o $$files_ref->{on_target_dedup_file}\n"; | |
834 | |
835 # On-target Stats and Housekeeping Stats | |
836 $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_file_1} -o $$files_ref->{reads_per_exon_file} -oh $$files_ref->{housekeeping_file}\n"; | |
837 $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_dedup_file_1} -o $$files_ref->{reads_per_exon_dedup_file} -oh $$files_ref->{housekeeping_dedup_file} -om $$files_ref->{reads_per_exon_dedup_file_machine}\n"; | |
838 } | |
839 else{ | |
840 # Counts of on- and off-target reads | |
841 $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_on_target_alone_file_1_linux_sorted} -on_alone_2 $$files_ref->{sam_on_target_alone_file_2_linux_sorted} -off_alone_1 $$files_ref->{sam_off_target_alone_file_1_linux_sorted} -off_alone_2 $$files_ref->{sam_off_target_alone_file_2_linux_sorted} -on_1 $$files_ref->{sam_on_target_file_1_linux_sorted} -on_2 $$files_ref->{sam_on_target_file_2_linux_sorted} -off_1 $$files_ref->{sam_off_target_file_1_linux_sorted} -off_2 $$files_ref->{sam_off_target_file_2_linux_sorted} -o $$files_ref->{on_target_file}\n"; | |
842 $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted} -on_alone_2 $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted} -off_alone_1 $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted} -off_alone_2 $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted} -on_1 $$files_ref->{sam_dedup_on_target_file_1_linux_sorted} -on_2 $$files_ref->{sam_dedup_on_target_file_2_linux_sorted} -off_1 $$files_ref->{sam_dedup_off_target_file_1_linux_sorted} -off_2 $$files_ref->{sam_dedup_off_target_file_2_linux_sorted} -o $$files_ref->{on_target_dedup_file}\n"; | |
843 | |
844 # On-target Stats and Housekeeping Stats | |
845 $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_file_1} -i2 $$files_ref->{intersect_file_2} -o $$files_ref->{reads_per_exon_file} -oh $$files_ref->{housekeeping_file}\n"; | |
846 | |
847 $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_dedup_file_1} -i2 $$files_ref->{intersect_dedup_file_2} -o $$files_ref->{reads_per_exon_dedup_file} -oh $$files_ref->{housekeeping_dedup_file} -om $$files_ref->{reads_per_exon_dedup_file_machine}\n"; | |
848 } | |
849 } | |
850 } | |
851 return $cmd_line; | |
852 } | |
853 | |
854 sub total_molecule_counts { | |
855 my $reads_ref = shift; | |
856 my $parameters_ref = shift; | |
857 my $files_ref = shift; | |
858 my $cmd_line = "echo Total Molecule Counts\n"; | |
859 if ( $$reads_ref eq 'single' ) { | |
860 $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_file_1_full_sorted} -o $$files_ref->{total_and_aligned_molecule_count_per_tag_file}\n"; | |
861 } | |
862 else { | |
863 $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_file_1_full_sorted} -r2 $$files_ref->{sam_file_2_full_sorted} -o $$files_ref->{total_and_aligned_molecule_count_per_tag_file}\n"; | |
864 } | |
865 return $cmd_line; | |
866 } | |
867 | |
868 | |
869 sub de_duplicated_molecule_counts { | |
870 my $reads_ref = shift; | |
871 my $parameters_ref = shift; | |
872 my $files_ref = shift; | |
873 my $cmd_line = "echo De-duplicated Molecule Counts\n"; | |
874 if ( $$reads_ref eq 'single' ) { | |
875 $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_dedup_file_1_full_sorted} -o $$files_ref->{unique_and_aligned_molecule_count_per_tag_file}\n"; | |
876 } | |
877 else { | |
878 $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_dedup_file_1_full_sorted} -r2 $$files_ref->{sam_dedup_file_2_full_sorted} -o $$files_ref->{unique_and_aligned_molecule_count_per_tag_file}\n"; | |
879 } | |
880 return $cmd_line; | |
881 } | |
882 | |
883 | |
884 sub all_molecule_counts { | |
885 my $parameters_ref = shift; | |
886 my $files_ref = shift; | |
887 my $cmd_line = "echo All Molecule Counts\n"; | |
888 $cmd_line .= "$$parameters_ref->{path}/counts_2.pl -tamc $$files_ref->{total_and_aligned_molecule_count_per_tag_file} -uamc $$files_ref->{unique_and_aligned_molecule_count_per_tag_file} -otd $$files_ref->{on_target_dedup_file} -ot $$files_ref->{on_target_file} -o $$files_ref->{counts_file} -om $$files_ref->{counts_file_machine}\n"; | |
889 return $cmd_line; | |
890 } | |
891 | |
892 sub qc_check { | |
893 my $parameters_ref = shift; | |
894 my $files_ref = shift; | |
895 my $cmd_line = "echo QC Check\n"; | |
896 $cmd_line .= "$$parameters_ref->{path}/qc_check.pl -hd $$files_ref->{housekeeping_dedup_file} -o $$files_ref->{qc_filter_file} -om $$files_ref->{qc_filter_file_machine}\n"; | |
897 return $cmd_line; | |
898 } | |
899 | |
900 sub coverage_uniformity { | |
901 my $reads_ref = shift; | |
902 my $parameters_ref = shift; | |
903 my $files_ref = shift; | |
904 my $cmd_line = "echo Coverage Uniformity\n"; | |
905 $cmd_line .= "$$parameters_ref->{path}/coverage_uniformity.pl -hd $$files_ref->{housekeeping_dedup_file} -r $$reads_ref -o $$files_ref->{coverage_uniformity_file} -om $$files_ref->{coverage_uniformity_file_machine}\n"; | |
906 return $cmd_line; | |
907 } | |
908 | |
909 | |
910 sub summary { | |
911 my $tag_ref = shift; | |
912 my $parameters_ref = shift; | |
913 my $files_ref = shift; | |
914 my $cmd_line = "echo Summary\n"; | |
915 | |
916 # my $sample_name = "'" . @$samples_array_ref[$sample_element_number] . "'"; | |
917 | |
918 $cmd_line .= "$$parameters_ref->{path}/summary.pl -s $$tag_ref -o $$files_ref->{summary_file} -om $$files_ref->{summary_file_machine}\n"; | |
919 $cmd_line .= "cat $$files_ref->{qc_filter_file} >> $$files_ref->{summary_file}\n"; | |
920 # $cmd_line .= "cat $$files_ref->{coverage_uniformity_file} >> $$files_ref->{summary_file}\n"; | |
921 $cmd_line .= "cat $$files_ref->{counts_file} >> $$files_ref->{summary_file}\n"; | |
922 if ( -e $$files_ref->{reads_per_exon_dedup_file} ) { | |
923 if ( -s $$files_ref->{reads_per_exon_dedup_file} ) { | |
924 $cmd_line .= "cat $$files_ref->{reads_per_exon_dedup_file} >> $$files_ref->{summary_file}\n"; | |
925 } | |
926 } | |
927 $cmd_line .= "cat $$files_ref->{fusion_counts_with_splice_bare_file} >> $$files_ref->{summary_file}\n"; | |
928 # $cmd_line .= "enscript -f Courier8 -p $$files_ref->{summary_file_ps} $$files_ref->{summary_file}\n"; | |
929 # $cmd_line .= "ps2pdf $$files_ref->{summary_file_ps} $$files_ref->{summary_file_pdf}\n"; | |
930 | |
931 # Machine Readable Summary File | |
932 $cmd_line .= "cat $$files_ref->{qc_filter_file_machine} >> $$files_ref->{summary_file_machine}\n"; | |
933 # $cmd_line .= "cat $$files_ref->{coverage_uniformity_file_machine} >> $$files_ref->{summary_file_machine}\n"; | |
934 $cmd_line .= "cat $$files_ref->{counts_file_machine} >> $$files_ref->{summary_file_machine}\n"; | |
935 if ( -e $$files_ref->{reads_per_exon_dedup_file_machine} ) { | |
936 if ( -s $$files_ref->{reads_per_exon_dedup_file_machine} ) { | |
937 $cmd_line .= "cat $$files_ref->{reads_per_exon_dedup_file_machine} >> $$files_ref->{summary_file_machine}\n"; | |
938 } | |
939 } | |
940 $cmd_line .= "cat $$files_ref->{fusion_counts_with_splice_bare_file_machine} >> $$files_ref->{summary_file_machine}\n"; | |
941 | |
942 return $cmd_line; | |
943 } | |
944 | |
945 | |
946 sub clean_up { | |
947 my $reads_ref = shift; | |
948 my $parameters_ref = shift; | |
949 my $files_ref = shift; | |
950 my $file; | |
951 my $cmd_line = "echo Clean Up\n"; | |
952 | |
953 # Clean up Flanking Sequences Files | |
954 $cmd_line .= "$$parameters_ref->{path}/clean_up_flanking_sequences.pl -f $$files_ref->{flanking_sequences_file}\n"; | |
955 | |
956 # Clean up Flanking Splice Sequences Files | |
957 $cmd_line .= "$$parameters_ref->{path}/clean_up_flanking_sequences.pl -f $$files_ref->{flanking_splice_sequences_file}\n"; | |
958 | |
959 # Clean up | |
960 $cmd_line .= "rm $$files_ref->{sam_file_1_full}\n"; | |
961 $cmd_line .= "rm $$files_ref->{bam_file_1}\n"; | |
962 $cmd_line .= "rm $$files_ref->{bed_file_1}\n"; | |
963 $cmd_line .= "rm $$files_ref->{bed_file_combined}\n"; | |
964 $cmd_line .= "rm $$files_ref->{sam_dedup_file_1}\n"; | |
965 $cmd_line .= "rm $$files_ref->{sam_dedup_file_1_full}\n"; | |
966 $cmd_line .= "rm $$files_ref->{bam_dedup_file_1}\n"; | |
967 $cmd_line .= "rm $$files_ref->{full_master_prejoin_file_1}\n"; | |
968 $cmd_line .= "rm $$files_ref->{full_master_file_1}\n"; | |
969 $cmd_line .= "rm $$files_ref->{full_master_dedup_prejoin_file_1}\n"; | |
970 $cmd_line .= "rm $$files_ref->{full_master_dedup_file_1}\n"; | |
971 $cmd_line .= "rm $$files_ref->{bed_points_file_1}\n"; | |
972 $cmd_line .= "rm $$files_ref->{bed_points_dedup_file_1}\n"; | |
973 $cmd_line .= "rm $$files_ref->{intersect_file_1}\n"; | |
974 $cmd_line .= "rm $$files_ref->{intersect_dedup_file_1}\n"; | |
975 $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_1}\n"; | |
976 $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_1}\n"; | |
977 $cmd_line .= "rm $$files_ref->{master_dedup_no_annotation_file_1}\n"; | |
978 $cmd_line .= "rm $$files_ref->{master_dedup_prejoin_file_1}\n"; | |
979 $cmd_line .= "rm $$files_ref->{master_dedup_file_1}\n"; | |
980 $cmd_line .= "rm $$files_ref->{fusion_reads_file}\n"; | |
981 $cmd_line .= "rm $$files_ref->{fusion_counts_file}\n"; | |
982 $cmd_line .= "rm $$files_ref->{fusion_counts_bare_file}\n"; | |
983 # $file = $$files_ref->{flanking_sequences_file} . ".tmp~"; | |
984 # $cmd_line .= "rm $file\n"; | |
985 # $file = $$files_ref->{flanking_splice_sequences_file} . ".tmp~"; | |
986 # $cmd_line .= "rm $file\n"; | |
987 $cmd_line .= "rm $$files_ref->{splice_counts_file}\n"; | |
988 $cmd_line .= "rm $$files_ref->{splice_counts_bare_file}\n"; | |
989 $cmd_line .= "rm $$files_ref->{fusion_counts_with_splice_bare_file}\n"; | |
990 $cmd_line .= "rm $$files_ref->{fusion_counts_with_splice_bare_file_machine}\n"; | |
991 $cmd_line .= "rm $$files_ref->{splice_reads_file}\n"; | |
992 $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_1_linux_sorted}\n"; | |
993 $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_1_linux_sorted}\n"; | |
994 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted}\n"; | |
995 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted}\n"; | |
996 $cmd_line .= "rm $$files_ref->{housekeeping_dedup_file}\n"; | |
997 $cmd_line .= "rm $$files_ref->{on_target_dedup_file}\n"; | |
998 $cmd_line .= "rm $$files_ref->{reads_per_exon_dedup_file}\n"; | |
999 $cmd_line .= "rm $$files_ref->{reads_per_exon_dedup_file_machine}\n"; | |
1000 $cmd_line .= "rm $$files_ref->{housekeeping_file}\n"; | |
1001 $cmd_line .= "rm $$files_ref->{on_target_file}\n"; | |
1002 $cmd_line .= "rm $$files_ref->{reads_per_exon_file}\n"; | |
1003 $cmd_line .= "rm $$files_ref->{sam_file_1_marked}\n"; | |
1004 $cmd_line .= "rm $$files_ref->{sam_dedup_file_1_marked}\n"; | |
1005 $cmd_line .= "rm $$files_ref->{bam_file_1_full}\n"; | |
1006 $cmd_line .= "rm $$files_ref->{bam_file_1_full_sorted}\n"; | |
1007 $cmd_line .= "rm $$files_ref->{sam_file_1_full_sorted}\n"; | |
1008 # $cmd_line .= "rm $$files_ref->{total_and_aligned_molecule_count_file_1}\n"; | |
1009 $cmd_line .= "rm $$files_ref->{total_and_aligned_molecule_count_per_tag_file}\n"; | |
1010 # $cmd_line .= "rm $$files_ref->{unique_and_aligned_molecule_count_file_1}\n"; | |
1011 $cmd_line .= "rm $$files_ref->{unique_and_aligned_molecule_count_per_tag_file}\n"; | |
1012 $cmd_line .= "rm $$files_ref->{bam_dedup_file_1_full}\n"; | |
1013 $cmd_line .= "rm $$files_ref->{bam_dedup_file_1_full_sorted}\n"; | |
1014 $cmd_line .= "rm $$files_ref->{sam_dedup_file_1_full_sorted}\n"; | |
1015 $cmd_line .= "rm $$files_ref->{qc_filter_file}\n"; | |
1016 $cmd_line .= "rm $$files_ref->{qc_filter_file_machine}\n"; | |
1017 $cmd_line .= "rm $$files_ref->{coverage_uniformity_file}\n"; | |
1018 $cmd_line .= "rm $$files_ref->{coverage_uniformity_file_machine}\n"; | |
1019 $cmd_line .= "rm $$files_ref->{counts_file}\n"; | |
1020 $cmd_line .= "rm $$files_ref->{counts_file_machine}\n"; | |
1021 # $cmd_line .= "rm $$files_ref->{summary_file_ps}\n"; | |
1022 if ( $$reads_ref eq 'paired' ) { | |
1023 $cmd_line .= "rm $$files_ref->{sam_file_2_full}\n"; | |
1024 $cmd_line .= "rm $$files_ref->{bam_file_2}\n"; | |
1025 $cmd_line .= "rm $$files_ref->{bed_file_2}\n"; | |
1026 $cmd_line .= "rm $$files_ref->{sam_dedup_file_2}\n"; | |
1027 $cmd_line .= "rm $$files_ref->{sam_dedup_file_2_full}\n"; | |
1028 $cmd_line .= "rm $$files_ref->{bam_dedup_file_2}\n"; | |
1029 $cmd_line .= "rm $$files_ref->{full_master_prejoin_file_2}\n"; | |
1030 $cmd_line .= "rm $$files_ref->{full_master_file_2}\n"; | |
1031 $cmd_line .= "rm $$files_ref->{full_master_dedup_prejoin_file_2}\n"; | |
1032 $cmd_line .= "rm $$files_ref->{full_master_dedup_file_2}\n"; | |
1033 $cmd_line .= "rm $$files_ref->{bed_points_file_2}\n"; | |
1034 $cmd_line .= "rm $$files_ref->{bed_points_dedup_file_2}\n"; | |
1035 $cmd_line .= "rm $$files_ref->{intersect_file_2}\n"; | |
1036 $cmd_line .= "rm $$files_ref->{intersect_dedup_file_2}\n"; | |
1037 $cmd_line .= "rm $$files_ref->{intersect_file_combined}\n"; | |
1038 $cmd_line .= "rm $$files_ref->{intersect_dedup_file_combined}\n"; | |
1039 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_1}\n"; | |
1040 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_1}\n"; | |
1041 $cmd_line .= "rm $$files_ref->{sam_on_target_file_1}\n"; | |
1042 $cmd_line .= "rm $$files_ref->{sam_off_target_file_1}\n"; | |
1043 $cmd_line .= "rm $$files_ref->{sam_on_target_file_2}\n"; | |
1044 $cmd_line .= "rm $$files_ref->{sam_off_target_file_2}\n"; | |
1045 $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_2}\n"; | |
1046 $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_2}\n"; | |
1047 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_2}\n"; | |
1048 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_2}\n"; | |
1049 $cmd_line .= "rm $$files_ref->{master_dedup_no_annotation_file_2}\n"; | |
1050 $cmd_line .= "rm $$files_ref->{master_dedup_prejoin_file_2}\n"; | |
1051 $cmd_line .= "rm $$files_ref->{master_dedup_file_2}\n"; | |
1052 $cmd_line .= "rm $$files_ref->{sam_on_target_file_1_linux_sorted}\n"; | |
1053 $cmd_line .= "rm $$files_ref->{sam_off_target_file_1_linux_sorted}\n"; | |
1054 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_file_1_linux_sorted}\n"; | |
1055 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_file_1_linux_sorted}\n"; | |
1056 $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_2_linux_sorted}\n"; | |
1057 $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_2_linux_sorted}\n"; | |
1058 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted}\n"; | |
1059 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted}\n"; | |
1060 $cmd_line .= "rm $$files_ref->{sam_on_target_file_2_linux_sorted}\n"; | |
1061 $cmd_line .= "rm $$files_ref->{sam_off_target_file_2_linux_sorted}\n"; | |
1062 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_file_2_linux_sorted}\n"; | |
1063 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_file_2_linux_sorted}\n"; | |
1064 $cmd_line .= "rm $$files_ref->{sam_file_2_marked}\n"; | |
1065 $cmd_line .= "rm $$files_ref->{sam_dedup_file_2_marked}\n"; | |
1066 $cmd_line .= "rm $$files_ref->{bam_file_2_full}\n"; | |
1067 $cmd_line .= "rm $$files_ref->{bam_file_2_full_sorted}\n"; | |
1068 $cmd_line .= "rm $$files_ref->{sam_file_2_full_sorted}\n"; | |
1069 # $cmd_line .= "rm $$files_ref->{total_and_aligned_molecule_count_file_2}\n"; | |
1070 # $cmd_line .= "rm $$files_ref->{unique_and_aligned_molecule_count_file_2}\n"; | |
1071 $cmd_line .= "rm $$files_ref->{bam_dedup_file_2_full}\n"; | |
1072 $cmd_line .= "rm $$files_ref->{bam_dedup_file_2_full_sorted}\n"; | |
1073 $cmd_line .= "rm $$files_ref->{sam_dedup_file_2_full_sorted}\n"; | |
1074 } | |
1075 | |
1076 return $cmd_line; | |
1077 } | |
1078 | |
1079 | |
1080 sub summary_for_unprocessed_sample { | |
1081 my $reads_ref = shift; | |
1082 my $fastq_file_1_ref = shift; | |
1083 my $fastq_file_2_ref = shift; | |
1084 my $parameters_ref = shift; | |
1085 my $files_ref = shift; | |
1086 my $cmd_line = "echo Summary for Unprocessed Sample\n"; | |
1087 $cmd_line .= "$$parameters_ref->{path}/summary_for_unprocessed_samples.pl -r $$reads_ref -f1 $$fastq_file_1_ref -f2 $$fastq_file_2_ref -o $$files_ref->{summary_file} -om $$files_ref->{summary_file_machine}\n"; | |
1088 # $cmd_line .= "enscript -f Courier8 -p $$files_ref->{summary_file_ps} $$files_ref->{summary_file}\n"; | |
1089 # $cmd_line .= "ps2pdf $$files_ref->{summary_file_ps} $$files_ref->{summary_file_pdf}\n"; | |
1090 return $cmd_line; | |
1091 } | |
1092 | |
1093 | |
1094 sub join_multiple_samples { | |
1095 my $parameters_ref = shift; | |
1096 my $cmd_line = "echo Join Multiple Samples\n"; | |
1097 $cmd_line = "$$parameters_ref->{path}/join_multisample_output.pl -config $$parameters_ref->{config_file} -o $$parameters_ref->{outputfile}\n"; | |
1098 return $cmd_line; | |
1099 } | |
1100 | |
1101 #sub define_alignment_file_names { | |
1102 # my $reads_ref = shift; | |
1103 # my $tags_array_ref = shift; | |
1104 # my $parameters_ref = shift; | |
1105 # my $files_ref = shift; | |
1106 # $$files_ref->{sam_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam"; | |
1107 # $$files_ref->{sam_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.full"; | |
1108 # if ( $$reads_ref eq 'paired' ) { | |
1109 # $$files_ref->{sam_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam"; | |
1110 # $$files_ref->{sam_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.full"; | |
1111 # } | |
1112 #} | |
1113 | |
1114 sub define_alignments_file_names { | |
1115 my $reads_ref = shift; | |
1116 my $tags_array_ref = shift; | |
1117 my $tag_ref = shift; | |
1118 my $parameters_ref = shift; | |
1119 my $files_ref = shift; | |
1120 $$files_ref->{sam_file_1_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fastq.sam"; | |
1121 $$files_ref->{sam_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.full"; | |
1122 $$files_ref->{sam_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam"; | |
1123 $$files_ref->{bam_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bam"; | |
1124 $$files_ref->{bed_file_1_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bed.orig"; | |
1125 if ( $$reads_ref eq 'paired' ) { | |
1126 $$files_ref->{sam_file_2_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".fastq.sam"; | |
1127 $$files_ref->{sam_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.full"; | |
1128 $$files_ref->{sam_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam"; | |
1129 $$files_ref->{bam_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bam"; | |
1130 $$files_ref->{bed_file_2_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bed.orig"; | |
1131 } | |
1132 $$files_ref->{bed_file_combined} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".combined.bed"; | |
1133 } | |
1134 | |
1135 sub define_de_duplication_file_names { | |
1136 my $reads_ref = shift; | |
1137 my $tags_array_ref = shift; | |
1138 my $tag_ref = shift; | |
1139 my $parameters_ref = shift; | |
1140 my $files_ref = shift; | |
1141 $$files_ref->{dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup_read_ids.dat"; | |
1142 $$files_ref->{sam_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.dedup"; | |
1143 $$files_ref->{sam_dedup_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.sam.full"; | |
1144 if ( $$reads_ref eq 'paired' ) { | |
1145 $$files_ref->{sam_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.dedup"; | |
1146 $$files_ref->{sam_dedup_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.sam.full"; | |
1147 } | |
1148 } | |
1149 | |
1150 sub define_on_and_off_target_file_names { | |
1151 my $reads_ref = shift; | |
1152 my $tags_array_ref = shift; | |
1153 my $tag_ref = shift; | |
1154 my $parameters_ref = shift; | |
1155 my $files_ref = shift; | |
1156 # File 1 | |
1157 $$files_ref->{target_regions_bed_file} = $$parameters_ref->{directory} . "/target_regions.bed"; | |
1158 $$files_ref->{full_master_prejoin_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.prejoin.master.dat"; | |
1159 $$files_ref->{full_master_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.master.dat"; | |
1160 $$files_ref->{bed_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bed"; | |
1161 $$files_ref->{bed_points_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".points.bed"; | |
1162 $$files_ref->{intersect_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".intersect.dat"; | |
1163 $$files_ref->{sam_on_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.alone.sam"; | |
1164 $$files_ref->{sam_file_1_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.marked"; | |
1165 $$files_ref->{sam_off_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.alone.sam"; | |
1166 $$files_ref->{sam_on_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.sam"; | |
1167 $$files_ref->{sam_off_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.sam"; | |
1168 # File 1 Dedup | |
1169 $$files_ref->{full_master_dedup_prejoin_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.dedup.prejoin.master.dat"; | |
1170 $$files_ref->{full_master_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.dedup.master.dat"; | |
1171 $$files_ref->{bed_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.bed"; | |
1172 $$files_ref->{bed_points_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.points.bed"; | |
1173 $$files_ref->{intersect_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.intersect.dat"; | |
1174 $$files_ref->{sam_dedup_on_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.alone.sam"; | |
1175 $$files_ref->{sam_dedup_file_1_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.dedup.marked"; | |
1176 $$files_ref->{sam_dedup_off_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.alone.sam"; | |
1177 $$files_ref->{sam_dedup_on_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.sam"; | |
1178 $$files_ref->{sam_dedup_off_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.sam"; | |
1179 # File 2 | |
1180 if ( $$reads_ref eq 'paired' ) { | |
1181 $$files_ref->{full_master_prejoin_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.prejoin.master.dat"; | |
1182 $$files_ref->{full_master_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.master.dat"; | |
1183 $$files_ref->{bed_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bed"; | |
1184 $$files_ref->{bed_points_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".points.bed"; | |
1185 $$files_ref->{intersect_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".intersect.dat"; | |
1186 $$files_ref->{sam_on_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.alone.sam"; | |
1187 $$files_ref->{sam_file_2_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.marked"; | |
1188 $$files_ref->{sam_off_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.alone.sam"; | |
1189 $$files_ref->{sam_on_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.sam"; | |
1190 $$files_ref->{sam_off_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.sam"; | |
1191 } | |
1192 # File 2 Dedup | |
1193 if ( $$reads_ref eq 'paired' ) { | |
1194 $$files_ref->{full_master_dedup_prejoin_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.dedup.prejoin.master.dat"; | |
1195 $$files_ref->{full_master_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.dedup.master.dat"; | |
1196 $$files_ref->{bed_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.bed"; | |
1197 $$files_ref->{bed_points_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.points.bed"; | |
1198 $$files_ref->{intersect_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.intersect.dat"; | |
1199 $$files_ref->{sam_dedup_on_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.alone.sam"; | |
1200 $$files_ref->{sam_dedup_file_2_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.dedup.marked"; | |
1201 $$files_ref->{sam_dedup_off_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.alone.sam"; | |
1202 $$files_ref->{sam_dedup_on_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.sam"; | |
1203 $$files_ref->{sam_dedup_off_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.sam"; | |
1204 } | |
1205 $$files_ref->{intersect_file_combined} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".intersect_combined.dat"; | |
1206 $$files_ref->{intersect_dedup_file_combined} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.intersect_combined.dat"; | |
1207 } | |
1208 | |
1209 sub define_coverage_and_start_site_file_names { | |
1210 my $reads_ref = shift; | |
1211 my $tags_array_ref = shift; | |
1212 my $parameters_ref = shift; | |
1213 my $files_ref = shift; | |
1214 $$files_ref->{start_site_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.start_site.bedgraph"; | |
1215 $$files_ref->{coverage_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.coverage.bedgraph"; | |
1216 if ( $$reads_ref eq 'paired' ) { | |
1217 $$files_ref->{start_site_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.start_site.bedgraph"; | |
1218 $$files_ref->{coverage_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.coverage.bedgraph"; | |
1219 } | |
1220 } | |
1221 | |
1222 sub define_master_files_file_names { | |
1223 my $reads_ref = shift; | |
1224 my $tags_array_ref = shift; | |
1225 my $parameters_ref = shift; | |
1226 my $files_ref = shift; | |
1227 $$files_ref->{master_dedup_no_annotation_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.no_annotation.master.dat"; | |
1228 $$files_ref->{master_dedup_prejoin_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.prejoin.master.dat"; | |
1229 $$files_ref->{master_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.master.dat"; | |
1230 if ( $$reads_ref eq 'paired' ) { | |
1231 $$files_ref->{master_dedup_no_annotation_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.no_annotation.master.dat"; | |
1232 $$files_ref->{master_dedup_prejoin_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.prejoin.master.dat"; | |
1233 $$files_ref->{master_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.master.dat"; | |
1234 } | |
1235 } | |
1236 | |
1237 sub define_fusion_reads_file_names { | |
1238 my $reads_ref = shift; | |
1239 my $tags_array_ref = shift; | |
1240 my $tag_ref = shift; | |
1241 my $parameters_ref = shift; | |
1242 my $files_ref = shift; | |
1243 $$files_ref->{one_segment_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".one_segment_reads.dat"; | |
1244 $$files_ref->{splice_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".splice_reads.dat"; | |
1245 $$files_ref->{fusion_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fusion_reads.dat"; | |
1246 $$files_ref->{multi_fusion_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".multi_fusion_reads.dat"; | |
1247 if ( $$reads_ref eq 'paired' ) { | |
1248 $$files_ref->{one_segment_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".one_segment_reads.dat"; | |
1249 $$files_ref->{splice_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".splice_reads.dat"; | |
1250 $$files_ref->{fusion_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".fusion_reads.dat"; | |
1251 $$files_ref->{multi_fusion_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".multi_fusion_reads.dat"; | |
1252 } | |
1253 $$files_ref->{fusion_reads_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_reads.combined.dat"; | |
1254 $$files_ref->{splice_reads_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".splice_reads.combined.dat"; | |
1255 } | |
1256 | |
1257 sub define_count_fusions_file_names { | |
1258 my $tag_ref = shift; | |
1259 my $parameters_ref = shift; | |
1260 my $files_ref = shift; | |
1261 $$files_ref->{fusion_counts_bare_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts_bare.dat"; | |
1262 $$files_ref->{fusion_counts_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts.dat"; | |
1263 $$files_ref->{splice_counts_bare_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".splice_counts_bare.dat"; | |
1264 $$files_ref->{splice_counts_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".splice_counts.dat"; | |
1265 $$files_ref->{fusion_counts_with_splice_bare_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts_with_splice_bare.dat"; | |
1266 $$files_ref->{fusion_counts_with_splice_bare_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts_with_splice_bare.machine.dat"; | |
1267 } | |
1268 | |
1269 sub define_flanking_sequences_file_names { | |
1270 my $tag_ref = shift; | |
1271 my $parameters_ref = shift; | |
1272 my $files_ref = shift; | |
1273 $$files_ref->{flanking_sequences_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".flanking_sequences.dat"; | |
1274 $$files_ref->{flanking_splice_sequences_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".flanking_splice_sequences.dat"; | |
1275 } | |
1276 | |
1277 sub define_consensus_sequences_file_names { | |
1278 my $tag_ref = shift; | |
1279 my $parameters_ref = shift; | |
1280 my $files_ref = shift; | |
1281 $$files_ref->{consensus_fusion_std_out_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_fusion_std_out.dat"; | |
1282 $$files_ref->{consensus_fusion_std_err_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_fusion_std_err.dat"; | |
1283 $$files_ref->{consensus_splice_std_out_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_splice_std_out.dat"; | |
1284 $$files_ref->{consensus_splice_std_err_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_splice_std_err.dat"; | |
1285 $$files_ref->{fusion_and_splice_consensus_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_and_splice_consensus_sequences.fasta"; | |
1286 } | |
1287 | |
1288 sub define_bam_dedup_files_file_names { | |
1289 my $reads_ref = shift; | |
1290 my $tags_array_ref = shift; | |
1291 my $parameters_ref = shift; | |
1292 my $files_ref = shift; | |
1293 $$files_ref->{bam_dedup_file_1} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[0] . ".dedup.bam"; | |
1294 $$files_ref->{bam_dedup_sorted_file_1_name} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[0] . ".dedup.sorted"; | |
1295 $$files_ref->{bam_dedup_sorted_file_1} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[0] . ".dedup.sorted.bam"; | |
1296 if ( $$reads_ref eq 'paired' ) { | |
1297 $$files_ref->{bam_dedup_file_2} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[1] . ".dedup.bam"; | |
1298 $$files_ref->{bam_dedup_sorted_file_2_name} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[1] . ".dedup.sorted"; | |
1299 $$files_ref->{bam_dedup_sorted_file_2} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[1] . ".dedup.sorted.bam"; | |
1300 } | |
1301 } | |
1302 | |
1303 sub define_sort_sam_files_file_names { | |
1304 my $reads_ref = shift; | |
1305 my $tags_array_ref = shift; | |
1306 my $parameters_ref = shift; | |
1307 my $files_ref = shift; | |
1308 $$files_ref->{sam_on_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.alone.linux_sorted.sam"; | |
1309 $$files_ref->{sam_off_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.alone.linux_sorted.sam"; | |
1310 $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.alone.linux_sorted.sam"; | |
1311 $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.alone.linux_sorted.sam"; | |
1312 $$files_ref->{sam_on_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.linux_sorted.sam"; | |
1313 $$files_ref->{sam_off_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.linux_sorted.sam"; | |
1314 $$files_ref->{sam_dedup_on_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.linux_sorted.sam"; | |
1315 $$files_ref->{sam_dedup_off_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.linux_sorted.sam"; | |
1316 if ( $$reads_ref eq 'paired' ) { | |
1317 $$files_ref->{sam_on_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.alone.linux_sorted.sam"; | |
1318 $$files_ref->{sam_off_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.alone.linux_sorted.sam"; | |
1319 $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.alone.linux_sorted.sam"; | |
1320 $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.alone.linux_sorted.sam"; | |
1321 $$files_ref->{sam_on_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.linux_sorted.sam"; | |
1322 $$files_ref->{sam_off_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.linux_sorted.sam"; | |
1323 $$files_ref->{sam_dedup_on_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.linux_sorted.sam"; | |
1324 $$files_ref->{sam_dedup_off_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.linux_sorted.sam"; | |
1325 } | |
1326 | |
1327 $$files_ref->{bam_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bam.full"; | |
1328 $$files_ref->{bam_file_1_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bam.full.prefix"; | |
1329 $$files_ref->{bam_file_1_full_sorted} = $$files_ref->{bam_file_1_full_prefix} . ".bam"; | |
1330 $$files_ref->{sam_file_1_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.full.sorted"; | |
1331 if ( $$reads_ref eq 'paired' ) { | |
1332 $$files_ref->{bam_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bam.full"; | |
1333 $$files_ref->{bam_file_2_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bam.full.prefix"; | |
1334 $$files_ref->{bam_file_2_full_sorted} = $$files_ref->{bam_file_2_full_prefix} . ".bam"; | |
1335 $$files_ref->{sam_file_2_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.full.sorted"; | |
1336 } | |
1337 | |
1338 $$files_ref->{bam_dedup_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.bam.full"; | |
1339 $$files_ref->{bam_dedup_file_1_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.bam.full.prefix"; | |
1340 $$files_ref->{bam_dedup_file_1_full_sorted} = $$files_ref->{bam_dedup_file_1_full_prefix} . ".bam"; | |
1341 $$files_ref->{sam_dedup_file_1_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.sam.full.sorted"; | |
1342 if ( $$reads_ref eq 'paired' ) { | |
1343 $$files_ref->{bam_dedup_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.bam.full"; | |
1344 $$files_ref->{bam_dedup_file_2_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.bam.full.prefix"; | |
1345 $$files_ref->{bam_dedup_file_2_full_sorted} = $$files_ref->{bam_dedup_file_2_full_prefix} . ".bam"; | |
1346 $$files_ref->{sam_dedup_file_2_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.sam.full.sorted"; | |
1347 } | |
1348 } | |
1349 | |
1350 sub define_on_target_stats_file_names { | |
1351 my $tag_ref = shift; | |
1352 my $parameters_ref = shift; | |
1353 my $files_ref = shift; | |
1354 $$files_ref->{on_target_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".on_target.dat"; | |
1355 $$files_ref->{on_target_dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.on_target.dat"; | |
1356 $$files_ref->{reads_per_exon_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".reads_per_exon.dat"; | |
1357 $$files_ref->{housekeeping_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".housekeeping.dat"; | |
1358 $$files_ref->{reads_per_exon_dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.reads_per_exon.dat"; | |
1359 $$files_ref->{housekeeping_dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".housekeeping.dedup.dat"; | |
1360 $$files_ref->{reads_per_exon_dedup_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.reads_per_exon.machine.dat"; | |
1361 } | |
1362 | |
1363 sub define_total_molecule_counts_file_names { | |
1364 my $reads_ref = shift; | |
1365 my $tags_array_ref = shift; | |
1366 my $tag_ref = shift; | |
1367 my $parameters_ref = shift; | |
1368 my $files_ref = shift; | |
1369 $$files_ref->{total_and_aligned_molecule_count_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".total_and_aligned_molecule_count.dat"; | |
1370 if ( $$reads_ref eq 'paired' ) { | |
1371 $$files_ref->{total_and_aligned_molecule_count_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".total_and_aligned_molecule_count.dat"; | |
1372 } | |
1373 $$files_ref->{total_and_aligned_molecule_count_per_tag_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".per_tag.total_and_aligned_molecule_count.dat"; | |
1374 } | |
1375 | |
1376 sub define_de_deduplicated_molecule_counts_file_names { | |
1377 my $reads_ref = shift; | |
1378 my $tags_array_ref = shift; | |
1379 my $tag_ref = shift; | |
1380 my $parameters_ref = shift; | |
1381 my $files_ref = shift; | |
1382 $$files_ref->{unique_and_aligned_molecule_count_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".unique_and_aligned_molecule_count.dat"; | |
1383 if ( $$reads_ref eq 'paired' ) { | |
1384 $$files_ref->{unique_and_aligned_molecule_count_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".unique_and_aligned_molecule_count.dat"; | |
1385 } | |
1386 $$files_ref->{unique_and_aligned_molecule_count_per_tag_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".per_tag.unique_and_aligned_molecule_count.dat"; | |
1387 } | |
1388 | |
1389 sub define_all_molecule_counts_file_names { | |
1390 my $tag_ref = shift; | |
1391 my $parameters_ref = shift; | |
1392 my $files_ref = shift; | |
1393 $$files_ref->{counts_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".counts.dat"; | |
1394 $$files_ref->{counts_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".counts.machine.dat"; | |
1395 } | |
1396 | |
1397 | |
1398 sub define_qc_check_file_names { | |
1399 my $tag_ref = shift; | |
1400 my $parameters_ref = shift; | |
1401 my $files_ref = shift; | |
1402 $$files_ref->{qc_filter_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".qc_filter.dat"; | |
1403 $$files_ref->{qc_filter_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".qc_filter.machine.dat"; | |
1404 } | |
1405 | |
1406 sub define_coverage_uniformity_file_names { | |
1407 my $tag_ref = shift; | |
1408 my $parameters_ref = shift; | |
1409 my $files_ref = shift; | |
1410 $$files_ref->{coverage_uniformity_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".coverage_uniformity.dat"; | |
1411 $$files_ref->{coverage_uniformity_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".coverage_uniformity.machine.dat"; | |
1412 } | |
1413 | |
1414 sub define_summary_file_names { | |
1415 my $tag_ref = shift; | |
1416 my $parameters_ref = shift; | |
1417 my $files_ref = shift; | |
1418 $$files_ref->{summary_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.dat"; | |
1419 $$files_ref->{summary_file_ps} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.ps"; | |
1420 $$files_ref->{summary_file_pdf} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.pdf"; | |
1421 $$files_ref->{summary_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.machine.dat"; | |
1422 } |