# HG changeset patch # User plus # Date 1401345175 14400 # Node ID 3af9b7634b2d738e6324803a8d1d8d468a996d5b # Parent d5aed166429d6ec0317019a9cf85258e60dd7587 Uploaded diff -r d5aed166429d -r 3af9b7634b2d archer.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/archer.pl Thu May 29 02:32:55 2014 -0400 @@ -0,0 +1,1422 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +my($i, $j, $k); +my $parameters = {}; + +sub usage { + print "\nUsage: $0 \n\n\t "; + print "REQUIRED \n\t "; + print "-config \n\n"; + exit(1); +} +if(scalar(@ARGV) == 0){ + usage(); +} + +# Parse the Command Line +&parse_command_line($parameters, @ARGV); + +# Log File +my $log_file = $parameters->{config_file} . ".log"; +unless ( open(LOG_FILE, ">$log_file") ) { + print "Cannot open file \"$log_file\" to write to!!\n\n"; + exit; +} +print LOG_FILE "config = $parameters->{config_file}\n"; + +# Time Stamp +my $timestamp = localtime(time); +print LOG_FILE $timestamp, "\n"; + +# Parse Config File +my @samples = (); +my $number_of_samples = &parse_config_file(\@samples, \$parameters); +print LOG_FILE "directory = $parameters->{directory}\n\n"; + +my $files = {}; +# Create Target Regions File +$files->{target_regions_file} = $parameters->{directory} . "/target_regions.dat"; +&create_target_regions_file(\$parameters, \$files, \*LOG_FILE); + +my @tags = (); +my($reads, $tag); +my($fastq_file_1, $fastq_file_2); +my %proceed = (); +my $alignment_string = ''; +my $number_of_alignment_files = 0; +my $cmd = ''; + +for($i = 0; $i < $number_of_samples; $i++){ + + # Define Tags + &define_tags(\@samples, \@tags, \$reads, \$tag, \$fastq_file_1, \$fastq_file_2, \$parameters, \*LOG_FILE); + + # Check if FASTQ Files Exist and are Non-empty + $proceed{$i} = &decide_to_proceed(\$reads, \$fastq_file_1, \$fastq_file_2); + + # Create String of FASTQ Files to be Aligned by bwa_enz + &create_alignment_string($proceed{$i}, \$reads, \$number_of_alignment_files, \$alignment_string, \$fastq_file_1, \$fastq_file_2); +} + +# Align the reads with bwa_enz +# Would be better to align paired reads together?? +$cmd = &align_reads(\$parameters, $number_of_alignment_files, $alignment_string); +print LOG_FILE $cmd; +system($cmd); + +for($i = 0; $i < $number_of_samples; $i++){ + + # Define Tags + &define_tags(\@samples, \@tags, \$reads, \$tag, \$fastq_file_1, \$fastq_file_2, \$parameters, \*LOG_FILE); + + # Define Summary File Names + &define_summary_file_names(\$tag, \$parameters, \$files); + + if ( $proceed{$i} == 1 ){ + + # Convert SAM -> BAM -> BED + &define_alignments_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); + $cmd = &rename_sam_files(\$reads, \$files); + $cmd .= &alignments(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # De-duplicate the SAM File(s) + &define_de_duplication_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); + $cmd = &de_duplication(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + # What if only reverse read exists? Don't want to de-duplicate? + + # Select On-/Off-Target Reads + # Split marked files into two files + &define_on_and_off_target_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); + $cmd = &select_on_and_off_target_reads(\$reads, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # Coverage and Start Sites + &define_coverage_and_start_site_file_names(\$reads, \@tags, \$parameters, \$files); + $cmd = &generate_coverage_and_start_sites(\$reads, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # Proceed through the rest of the pipeline using the on-target reads + + # Create Master Files - one line per read + &define_master_files_file_names(\$reads, \@tags, \$parameters, \$files); + $cmd = &generate_master_files(\$reads, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # Select Fusion Reads - do not count mapping to a 'novel', i.e., not in refseq, region as a fusion + &define_fusion_reads_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); + $cmd = &select_fusion_reads(\$reads, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # Count Fusions + &define_count_fusions_file_names(\$tag, \$parameters, \$files); + $cmd = &count_fusions(\$reads, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # Flanking Sequences + &define_flanking_sequences_file_names(\$tag, \$parameters, \$files); + $cmd = &flanking_sequences(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # BAM Dedup Files + &define_bam_dedup_files_file_names(\$reads, \@tags, \$parameters, \$files); + $cmd = &bam_dedup_files(\$reads, \$files); + print LOG_FILE $cmd; + system($cmd); + + # Consensus Sequences - Fusion and Splice + #&define_consensus_sequences_file_names(\$tag, \$parameters, \$files); + #$cmd = &consensus_sequences(\$reads, \$fastq_file_1, \$fastq_file_2, \$tag, \$parameters, \$files); + #print LOG_FILE $cmd; + #system($cmd); + + # Sort SAM Files + &define_sort_sam_files_file_names(\$reads, \@tags, \$parameters, \$files); + $cmd = &sort_sam_files(\$reads, \$files); + print LOG_FILE $cmd; + system($cmd); + + # On-target Stats + &define_on_target_stats_file_names(\$tag, \$parameters, \$files); + $cmd = &on_target_stats(\$reads, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # Total Molecule Counts + &define_total_molecule_counts_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); + $cmd = &total_molecule_counts(\$reads, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # De-duplicated Molecule Counts + &define_de_deduplicated_molecule_counts_file_names(\$reads, \@tags, \$tag, \$parameters, \$files); + $cmd = &de_duplicated_molecule_counts(\$reads, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # All Molecule Counts + &define_all_molecule_counts_file_names(\$tag, \$parameters, \$files); + $cmd = &all_molecule_counts(\$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # QC Check + &define_qc_check_file_names(\$tag, \$parameters, \$files); + $cmd = &qc_check(\$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # Coverage Uniformity + &define_coverage_uniformity_file_names(\$tag, \$parameters, \$files); + $cmd = &coverage_uniformity(\$reads, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # Summary + $cmd = &summary(\$tag, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + + # Clean Up + $cmd = &clean_up(\$reads, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + } + else{ + # Summary for Unprocessed Sample + $cmd = &summary_for_unprocessed_sample(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files); + print LOG_FILE $cmd; + system($cmd); + } + + # Time Stamp + $timestamp = localtime(time); + print LOG_FILE $timestamp, "\n"; +} + +# Join Multiple Samples +#$cmd = &join_multiple_samples(\$parameters); +#print LOG_FILE $cmd; +#system($cmd); + +close(LOG_FILE); + +exit; + +sub parse_command_line { + my($parameters, @ARGV) = @_; + my $next_arg; + while(scalar @ARGV > 0){ + $next_arg = shift(@ARGV); + if($next_arg eq "-config"){ $parameters->{config_file} = shift (@ARGV); } + } +} + + +sub parse_config_file { + my $samples_ref = shift; + my $parameters_ref = shift; + my @values = (); + my $count = 0; + open( FILE, "< $$parameters_ref->{config_file}" ) or die "Can't open $$parameters_ref->{config_file} : $!"; + while( ) { + chomp; + if ( length($_) > 1 and $_ !~ /^\#/ ){ + @values = (); + @values = split(/=/, $_); + if($values[0] eq 'sample'){ + @$samples_ref[$count] = $values[1]; + $count++; + } + else{ + $$parameters_ref->{$values[0]} = $values[1]; + } + } + } + my $num_samples = scalar @$samples_ref; + print "number of samples = $num_samples\n"; + return $num_samples; +} + + +sub create_target_regions_file { + my $parameters_ref = shift; + my $files_ref = shift; + my $file_handle_ref = shift; + my $cmd = ''; + + if ( ($$parameters_ref->{control_regions_file} ne 'NULL') && ($$parameters_ref->{target_regions_file} ne 'NULL') ) { + $cmd = "cat $$parameters_ref->{control_regions_file} $$parameters_ref->{target_regions_file} > $$files_ref->{target_regions_file}\n"; + print $file_handle_ref $cmd; + system($cmd); + } + elsif ( $$parameters_ref->{target_regions_file} ne 'NULL' ) { + $$files_ref->{target_regions_file} = $$parameters_ref->{target_regions_file}; + } + elsif ( $$parameters_ref->{control_regions_file} ne 'NULL' ) { + $$files_ref->{target_regions_file} = $$parameters_ref->{control_regions_file}; + } + else { # Create Target Regions File + my $label; + my $target_file = $$parameters_ref->{directory} . "/target_file.dat"; + my $control_file = $$parameters_ref->{directory} . "/control_file.dat"; + my $target_temp_outputfile_1 = $$parameters_ref->{directory} . "/target_temp_1.dat"; + my $target_temp_outputfile_2 = $$parameters_ref->{directory} . "/target_temp_2.dat"; + my $control_temp_outputfile_1 = $$parameters_ref->{directory} . "/control_temp_1.dat"; + my $control_temp_outputfile_2 = $$parameters_ref->{directory} . "/control_temp_2.dat"; + my $path_to_annotation_script = $$parameters_ref->{path} . "/archer/annotation/"; + # Target Primers Fasta File + if ( -e $$parameters_ref->{target_primers} ) { + if ( -s $$parameters_ref->{target_primers} ) { + $label = 'fusion'; + $cmd = "$$parameters_ref->{path}/create_target_regions_file.pl -target $$parameters_ref->{target_primers} -label $label -refseq $$parameters_ref->{refseq_file} -gtf_file $$parameters_ref->{gtf_file} -path $path_to_annotation_script -t1 $target_temp_outputfile_1 -t2 $target_temp_outputfile_2 -o $target_file\n"; + print $file_handle_ref $cmd; + system($cmd); + } + } + # Control Primers Fasta File + if ( -e $$parameters_ref->{control_primers} ) { + if ( -s $$parameters_ref->{control_primers} ) { + $label = 'housekeeping'; + $cmd = "$$parameters_ref->{path}/create_target_regions_file.pl -target $$parameters_ref->{control_primers} -label $label -refseq $$parameters_ref->{refseq_file} -gtf_file $$parameters_ref->{gtf_file} -path $path_to_annotation_script -t1 $control_temp_outputfile_1 -t2 $control_temp_outputfile_2 -o $control_file\n"; + print $file_handle_ref $cmd; + system($cmd); + } + } + if ( -e $control_file ) { + if ( -s $control_file ) { + $cmd = "cp $control_file $$files_ref->{target_regions_file}\n"; + if ( -e $target_file ) { + if ( -s $target_file ) { + $cmd .= "cat $target_file >> $$files_ref->{target_regions_file}\n"; + } + } + print $file_handle_ref $cmd; + system($cmd); + } + } + elsif ( -e $target_file ) { + if ( -s $target_file ) { + $cmd = "cp $target_file $$files_ref->{target_regions_file}\n"; + print $file_handle_ref $cmd; + system($cmd); + } + } + $cmd = ''; + if ( -e $target_file ) { + $cmd .= "rm $target_temp_outputfile_1\n"; + $cmd .= "rm $target_temp_outputfile_2\n"; + } + if ( -e $control_file ) { + $cmd .= "rm $control_temp_outputfile_1\n"; + $cmd .= "rm $control_temp_outputfile_2\n"; + } + print $file_handle_ref $cmd; + system($cmd); + + $cmd = ''; + if ( -e $target_file ) { + $cmd .= "rm $target_file\n"; + } + if ( -e $control_file ) { + $cmd .= "rm $control_file\n"; + } + print $file_handle_ref $cmd; + system($cmd); + + print $file_handle_ref "\n"; + } +} + + +sub define_tags { + my $samples_array_ref = shift; + my $tags_array_ref = shift; + my $reads_ref = shift; + my $tag_ref = shift; + my $fastq_file_1_ref = shift; + my $fastq_file_2_ref = shift; + my $parameters_ref = shift; + my $file_handle_ref = shift; + + @$tags_array_ref = (); + @$tags_array_ref = split(/\s+/, @$samples_array_ref[$i]); # Split samples on whitespace + + if( (scalar @$tags_array_ref) == 1 ){ + $$reads_ref = 'single'; + $$tag_ref = $tags[0]; + $$fastq_file_1_ref = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fastq"; + } + else{ + $$reads_ref = 'paired'; + $$tag_ref = @$tags_array_ref[0] . "_" . @$tags_array_ref[1]; + $$fastq_file_1_ref = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fastq"; + $$fastq_file_2_ref = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".fastq"; + } + + print $$tag_ref, "\n"; + + print $file_handle_ref $$tag_ref, "\n"; +} + + +sub decide_to_proceed { + my $reads_ref = shift; + my $fastq_file_1_ref = shift; + my $fastq_file_2_ref = shift; + my $proceed = 0; + + if ( $$reads_ref eq 'single' ) { + if ( -e $$fastq_file_1_ref ) { + if ( -s $$fastq_file_1_ref ) { + $proceed = 1; + } + } + else{ + $$fastq_file_1_ref .= ".gz"; # See if fastq_file_1 exists in gzipped form + if ( -e $$fastq_file_1_ref ) { + if ( -s $$fastq_file_1_ref ) { + $proceed = 1; + } + } + } + } + if ( $$reads_ref eq 'paired' ) { + if ( -e $$fastq_file_1_ref ) { + if ( -s $$fastq_file_1_ref ) { + if ( -e $$fastq_file_2_ref ) { + if ( -s $$fastq_file_2_ref ) { + $proceed = 1; + } + } + else{ + $$fastq_file_2_ref .= ".gz"; # See if fastq_file_2 exists in gzipped form + if ( -e $$fastq_file_2_ref ) { + if ( -s $$fastq_file_2_ref ) { + $proceed = 1; + } + } + } + } + } + else{ + $$fastq_file_1_ref .= ".gz"; # See if fastq_file_1 exists in gzipped form + if ( -e $$fastq_file_1_ref ) { + if ( -s $$fastq_file_1_ref ) { + if ( -e $$fastq_file_2_ref ) { + if ( -s $$fastq_file_2_ref ) { + $proceed = 1; + } + } + else{ + $$fastq_file_2_ref .= ".gz"; # See if fastq_file_2 exists in gzipped form + if ( -e $$fastq_file_2_ref ) { + if ( -s $$fastq_file_2_ref ) { + $proceed = 1; + } + } + } + } + } + } + } + return $proceed; +} + + +sub create_alignment_string { + my $proceed_value = shift; + my $reads_ref = shift; + my $number_of_alignment_files_ref = shift; + my $alignment_string_ref = shift; + my $fastq_file_1_ref = shift; + my $fastq_file_2_ref = shift; + + if ( $proceed_value == 1 ) { + if ( $$number_of_alignment_files_ref == 0 ){ + $$alignment_string_ref = $$fastq_file_1_ref; + } + else { + $$alignment_string_ref .= " " . $$fastq_file_1_ref; + } + $$number_of_alignment_files_ref++; + if ( $$reads_ref eq 'paired' ) { + $$alignment_string_ref .= " " . $$fastq_file_2_ref; + $$number_of_alignment_files_ref++; + } + } +} + + +sub align_reads { + my $parameters_ref = shift; + my $number_of_alignment_files_value = shift; + my $alignment_string_value = shift; + my $cmd_line = "echo Align Reads\n"; + if ( $number_of_alignment_files_value > 0 ) { + $cmd_line .= "bwa_enz mem -Q 0 -m -D $$parameters_ref->{directory} $$parameters_ref->{reference_file} $alignment_string_value\n"; + } + return $cmd_line; +} + + +sub rename_sam_files { + my $reads_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Rename SAM Files\n"; + $cmd_line .= "mv $$files_ref->{sam_file_1_orig} $$files_ref->{sam_file_1_full}\n"; + if ( $$reads_ref eq 'paired' ) { + $cmd_line .= "mv $$files_ref->{sam_file_2_orig} $$files_ref->{sam_file_2_full}\n"; + } + return $cmd_line; +} + + +sub alignments { + my $reads_ref = shift; + my $fastq_file_1_ref = shift; + my $fastq_file_2_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Alignments\n"; + + # -S input is SAM + # -b output is BAM + # -h print header for the SAM output + +# $cmd_line .= "bwa mem $$parameters_ref->{reference_file} $$fastq_file_1_ref > $$files_ref->{sam_file_1_full}\n"; + $cmd_line .= "samtools view -Shq 40 $$files_ref->{sam_file_1_full} > $$files_ref->{sam_file_1}\n"; + $cmd_line .= "samtools view -bS $$files_ref->{sam_file_1} > $$files_ref->{bam_file_1}\n"; + $cmd_line .= "bamToBed -i $$files_ref->{bam_file_1} > $$files_ref->{bed_file_1_orig}\n"; + if ( $$reads_ref eq 'single' ) { + $cmd_line .= "mv $$files_ref->{bed_file_1_orig} $$files_ref->{bed_file_combined}\n"; + } + elsif ( $$reads_ref eq 'paired' ) { +# $cmd_line .= "bwa mem $$parameters_ref->{reference_file} $$fastq_file_2_ref > $$files_ref->{sam_file_2_full}\n"; + $cmd_line .= "samtools view -Shq 40 $$files_ref->{sam_file_2_full} > $$files_ref->{sam_file_2}\n"; + $cmd_line .= "samtools view -bS $$files_ref->{sam_file_2} > $$files_ref->{bam_file_2}\n"; + $cmd_line .= "bamToBed -i $$files_ref->{bam_file_2} > $$files_ref->{bed_file_2_orig}\n"; + $cmd_line .= "cat $$files_ref->{bed_file_1_orig} $$files_ref->{bed_file_2_orig} > $$files_ref->{bed_file_combined}\n"; + $cmd_line .= "rm $$files_ref->{bed_file_1_orig}\n"; + $cmd_line .= "rm $$files_ref->{bed_file_2_orig}\n"; + } + return $cmd_line; +} + + +sub de_duplication { + my $reads_ref = shift; + my $fastq_file_1_ref = shift; + my $fastq_file_2_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo De-duplication\n"; + + if ( $$reads_ref eq 'single' ) { + $cmd_line .= "sort -k4,4 $$files_ref->{bed_file_combined}|$$parameters_ref->{path}/dedup_pipeline.sh -p $$parameters_ref->{path} -b /dev/stdin -f $$fastq_file_1_ref > $$files_ref->{dedup_file}\n"; + } + else{ + $cmd_line .= "sort -k4,4 $$files_ref->{bed_file_combined}|$$parameters_ref->{path}/dedup_pipeline.sh -p $$parameters_ref->{path} -b /dev/stdin -f $$fastq_file_1_ref -2 $$fastq_file_2_ref > $$files_ref->{dedup_file}\n"; + } + $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_1} -o $$files_ref->{sam_dedup_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_1_full} -o $$files_ref->{sam_dedup_file_1_full}\n"; + if ( $$reads_ref eq 'paired' ) { + $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_2} -o $$files_ref->{sam_dedup_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_2_full} -o $$files_ref->{sam_dedup_file_2_full}\n"; + } + return $cmd_line; +} + + +sub select_on_and_off_target_reads { + my $reads_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Select On- and Off-target Reads\n"; + + # Create BED file of target regions + $cmd_line .= "$$parameters_ref->{path}/convert_target_regions_to_bed.pl -t $$files_ref->{target_regions_file} -o $$files_ref->{target_regions_bed_file}\n"; + + # All Reads + # Create a master file from the SAM file + # Join the master file + # Select the appropriate segment for each read id and create a BED file - need to have only one entry for each read in the BED file + # Convert BED files of reads to single points - do this so that only start of R2 and end of R1 are counted in overlapping with the target regions so that only the target region that overlaps with these end points will be counted as being hit - assumes that there are no overlapping regions in the target regions file + # Get intersection of reads with target regions + $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_file_1} -o $$files_ref->{full_master_prejoin_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_prejoin_file_1} -o $$files_ref->{full_master_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_points_file_1}\n"; + $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_file_1} -wa -wb > $$files_ref->{intersect_file_1}\n"; + if ( $$reads_ref eq 'paired' ) { + $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_file_2} -o $$files_ref->{full_master_prejoin_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_prejoin_file_2} -o $$files_ref->{full_master_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_points_file_2}\n"; + $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_file_2} -wa -wb > $$files_ref->{intersect_file_2}\n"; + $cmd_line .= "cat $$files_ref->{intersect_file_1} $$files_ref->{intersect_file_2} > $$files_ref->{intersect_file_combined}\n"; + } + # Create SAM files of the on- and off-target reads. On-target files contain all reads in which at least one of R1/R2 is on-target. Off-target files contain the remaining reads. + # On-/Off-target Alone Read 1 + $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_1} -i $$files_ref->{intersect_file_1} -on $$files_ref->{sam_on_target_alone_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_1} -on $$files_ref->{sam_on_target_alone_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_1_marked}\n"; + $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_1_marked} > $$files_ref->{sam_off_target_alone_file_1}\n"; + if ( $$reads_ref eq 'paired' ) { + # On-/Off-target Alone Read 2 + $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_2} -i $$files_ref->{intersect_file_2} -on $$files_ref->{sam_on_target_alone_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_2} -on $$files_ref->{sam_on_target_alone_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_2_marked}\n"; + $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_2_marked} > $$files_ref->{sam_off_target_alone_file_2}\n"; + # On-/Off-target Either Read 1 + $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_1} -i $$files_ref->{intersect_file_combined} -on $$files_ref->{sam_on_target_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_1} -on $$files_ref->{sam_on_target_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_1_marked}\n"; + $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_1_marked} > $$files_ref->{sam_off_target_file_1}\n"; + # On-/Off-target Either Read 2 + $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_2} -i $$files_ref->{intersect_file_combined} -on $$files_ref->{sam_on_target_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_2} -on $$files_ref->{sam_on_target_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_2_marked}\n"; + $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_2_marked} > $$files_ref->{sam_off_target_file_2}\n"; + } + + # De-duplicated Reads + $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_file_1} -o $$files_ref->{full_master_dedup_prejoin_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_dedup_prejoin_file_1} -o $$files_ref->{full_master_dedup_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_dedup_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_dedup_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_dedup_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_points_dedup_file_1}\n"; + $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_dedup_file_1} -wa -wb > $$files_ref->{intersect_dedup_file_1}\n"; + if ( $$reads_ref eq 'paired' ) { + $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_file_2} -o $$files_ref->{full_master_dedup_prejoin_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_dedup_prejoin_file_2} -o $$files_ref->{full_master_dedup_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_dedup_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_dedup_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_dedup_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_points_dedup_file_2}\n"; + $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_dedup_file_2} -wa -wb > $$files_ref->{intersect_dedup_file_2}\n"; + $cmd_line .= "cat $$files_ref->{intersect_dedup_file_1} $$files_ref->{intersect_dedup_file_2} > $$files_ref->{intersect_dedup_file_combined}\n"; + } + # Create SAM files of the on- and off-target reads. On-target files contain all reads in which at least one of R1/R2 is on-target. Off-target files contain the remaining reads. + # On-/Off-target Alone Read 1 + $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_1} -i $$files_ref->{intersect_dedup_file_1} -on $$files_ref->{sam_dedup_on_target_alone_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_1} -on $$files_ref->{sam_dedup_on_target_alone_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_1_marked}\n"; + $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_1_marked} > $$files_ref->{sam_dedup_off_target_alone_file_1}\n"; + if ( $$reads_ref eq 'paired' ) { + # On-/Off-target Alone Read 2 + $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_2} -i $$files_ref->{intersect_dedup_file_2} -on $$files_ref->{sam_dedup_on_target_alone_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_2} -on $$files_ref->{sam_dedup_on_target_alone_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_2_marked}\n"; + $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_2_marked} > $$files_ref->{sam_dedup_off_target_alone_file_2}\n"; + # On-/Off-target Either Read 1 + $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_1} -i $$files_ref->{intersect_dedup_file_combined} -on $$files_ref->{sam_dedup_on_target_file_1}\n"; + $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_1} -on $$files_ref->{sam_dedup_on_target_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_1_marked}\n"; + $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_1_marked} > $$files_ref->{sam_dedup_off_target_file_1}\n"; + # On-/Off-target Either Read 2 + $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_2} -i $$files_ref->{intersect_dedup_file_combined} -on $$files_ref->{sam_dedup_on_target_file_2}\n"; + $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_2} -on $$files_ref->{sam_dedup_on_target_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_2_marked}\n"; + $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_2_marked} > $$files_ref->{sam_dedup_off_target_file_2}\n"; + } + return $cmd_line; +} + + +sub generate_coverage_and_start_sites { + my $reads_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Generate Coverage and Start Sites\n"; + + $cmd_line .= "bash $$parameters_ref->{path}/generateHistAndStartSiteInfo.sh $$files_ref->{sam_dedup_file_1} $$parameters_ref->{reference_file} $$parameters_ref->{reference_file_index} $$files_ref->{start_site_dedup_file_1} $$files_ref->{coverage_dedup_file_1} $$parameters_ref->{path}\n"; + + if ( $$reads_ref eq 'paired' ) { + $cmd_line .= "bash $$parameters_ref->{path}/generateHistAndStartSiteInfo.sh $$files_ref->{sam_dedup_file_2} $$parameters_ref->{reference_file} $$parameters_ref->{reference_file_index} $$files_ref->{start_site_dedup_file_2} $$files_ref->{coverage_dedup_file_2} $$parameters_ref->{path}\n"; + } + + return $cmd_line; +} + + +sub generate_master_files { + my $reads_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Generate Master Files\n"; + + # Create a master file of all reads with one line per read + if ( $$reads_ref eq 'single' ) { + $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_on_target_alone_file_1} -o $$files_ref->{master_dedup_no_annotation_file_1}\n"; + } + else{ + $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_on_target_file_1} -o $$files_ref->{master_dedup_no_annotation_file_1}\n"; + } + + $cmd_line .= "python $$parameters_ref->{path}/archer/annotation/annotate.py --gtf_file $$parameters_ref->{gtf_file} --coordinate_file $$files_ref->{master_dedup_no_annotation_file_1} --outfile $$files_ref->{master_dedup_prejoin_file_1} --chromosome_indices 1,1 --coordinate_indices 5,6\n"; + + $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{master_dedup_prejoin_file_1} -o $$files_ref->{master_dedup_file_1}\n"; + + if ( $$reads_ref eq 'paired' ) { + $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_on_target_file_2} -o $$files_ref->{master_dedup_no_annotation_file_2}\n"; + + $cmd_line .= "python $$parameters_ref->{path}/archer/annotation/annotate.py --gtf_file $$parameters_ref->{gtf_file} --coordinate_file $$files_ref->{master_dedup_no_annotation_file_2} --outfile $$files_ref->{master_dedup_prejoin_file_2} --chromosome_indices 1,1 --coordinate_indices 5,6\n"; + + $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{master_dedup_prejoin_file_2} -o $$files_ref->{master_dedup_file_2}\n"; + } + + return $cmd_line; +} + +sub select_fusion_reads { + my $reads_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Select Fusion Reads\n"; + + $cmd_line .= "$$parameters_ref->{path}/select_fusion_reads.pl -master $$files_ref->{master_dedup_file_1} -tag 1 -o1 $$files_ref->{one_segment_reads_file_1} -os $$files_ref->{splice_reads_file_1} -of $$files_ref->{fusion_reads_file_1} -omf $$files_ref->{multi_fusion_reads_file_1}\n"; + + if ( $$reads_ref eq 'single' ) { + $cmd_line .= "cp $$files_ref->{fusion_reads_file_1} $$files_ref->{fusion_reads_file}\n"; + $cmd_line .= "cp $$files_ref->{splice_reads_file_1} $$files_ref->{splice_reads_file}\n"; + } + + if ( $$reads_ref eq 'paired' ) { + $cmd_line .= "$$parameters_ref->{path}/select_fusion_reads.pl -master $$files_ref->{master_dedup_file_2} -tag 2 -o1 $$files_ref->{one_segment_reads_file_2} -os $$files_ref->{splice_reads_file_2} -of $$files_ref->{fusion_reads_file_2} -omf $$files_ref->{multi_fusion_reads_file_2}\n"; + $cmd_line .= "cat $$files_ref->{fusion_reads_file_1} $$files_ref->{fusion_reads_file_2} > $$files_ref->{fusion_reads_file}\n"; + $cmd_line .= "cat $$files_ref->{splice_reads_file_1} $$files_ref->{splice_reads_file_2} > $$files_ref->{splice_reads_file}\n"; + } + + return $cmd_line; +} + +sub count_fusions { + my $reads_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Count Fusions and Splice Events\n"; + + # Count the number of each type of fusion pair, i.e., Gene A Exon X with Gene B Exon Y, get the median value of the coordinate of each breakpoint, sort and output each type + $cmd_line .= "$$parameters_ref->{path}/count_fusions.pl -t $$files_ref->{target_regions_file} -fr $$files_ref->{fusion_reads_file} -min 30 -gtf $$parameters_ref->{gtf_file} -read $$reads_ref -limit 10 -min_occ 5 -ob $$files_ref->{fusion_counts_bare_file} -o $$files_ref->{fusion_counts_file}\n"; + + # Count the number of each type of splice pair - Use default values for -limit and -min_occ so that all splices will be reported + $cmd_line .= "$$parameters_ref->{path}/count_fusions.pl -t $$files_ref->{target_regions_file} -fr $$files_ref->{splice_reads_file} -min 30 -gtf $$parameters_ref->{gtf_file} -read $$reads_ref -ob $$files_ref->{splice_counts_bare_file} -o $$files_ref->{splice_counts_file}\n"; + + # Add splice evidence to fusion counts + $cmd_line .= "$$parameters_ref->{path}/add_splice_to_fusion_counts.pl -fcb $$files_ref->{fusion_counts_bare_file} -scb $$files_ref->{splice_counts_bare_file} -o $$files_ref->{fusion_counts_with_splice_bare_file} -om $$files_ref->{fusion_counts_with_splice_bare_file_machine}\n"; + + return $cmd_line; +} + + +sub flanking_sequences { + my $reads_ref = shift; + my $fastq_file_1_ref = shift; + my $fastq_file_2_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Flanking Sequences\n"; + + if ( $$reads_ref eq 'single' ) { + $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{fusion_counts_bare_file} -fr $$files_ref->{fusion_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -o $$files_ref->{flanking_sequences_file}\n"; + $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{splice_counts_bare_file} -fr $$files_ref->{splice_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -o $$files_ref->{flanking_splice_sequences_file}\n"; + } + else{ + $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{fusion_counts_bare_file} -fr $$files_ref->{fusion_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -fastq_2 $$fastq_file_2_ref -o $$files_ref->{flanking_sequences_file}\n"; + $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{splice_counts_bare_file} -fr $$files_ref->{splice_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -fastq_2 $$fastq_file_2_ref -o $$files_ref->{flanking_splice_sequences_file}\n"; + } + return $cmd_line; +} + + +sub bam_dedup_files { + my $reads_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo BAM Dedup Files\n"; + # Make sorted de-dup BAM files + $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_1} > $$files_ref->{bam_dedup_file_1}\n"; + $cmd_line .= "samtools sort $$files_ref->{bam_dedup_file_1} $$files_ref->{bam_dedup_sorted_file_1_name}\n"; + $cmd_line .= "samtools index $$files_ref->{bam_dedup_sorted_file_1}\n"; + if ( $$reads_ref eq 'paired' ) { + $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_2} > $$files_ref->{bam_dedup_file_2}\n"; + $cmd_line .= "samtools sort $$files_ref->{bam_dedup_file_2} $$files_ref->{bam_dedup_sorted_file_2_name}\n"; + $cmd_line .= "samtools index $$files_ref->{bam_dedup_sorted_file_2}\n"; + } + return $cmd_line; +} + + +sub consensus_sequences { + my $reads_ref = shift; + my $fastq_file_1_ref = shift; + my $fastq_file_2_ref = shift; + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Consensus Sequences\n"; + + # Fusion and Splice Consensus Sequences + if ( $$reads_ref eq 'single' ) { + $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -a $$fastq_file_1_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_fusion_std_out_file} -e $$files_ref->{consensus_fusion_std_err_file}\n"; + $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -a $$fastq_file_1_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_splice_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_splice_std_out_file} -e $$files_ref->{consensus_splice_std_err_file}\n"; + } + else{ + $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -2 $$files_ref->{bam_dedup_sorted_file_2} -a $$fastq_file_1_ref -b $$fastq_file_2_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_fusion_std_out_file} -e $$files_ref->{consensus_fusion_std_err_file}\n"; + $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -2 $$files_ref->{bam_dedup_sorted_file_2} -a $$fastq_file_1_ref -b $$fastq_file_2_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_splice_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_splice_std_out_file} -e $$files_ref->{consensus_splice_std_err_file}\n"; + } + + # Pair Fusion Candidates with Splice Sequences + $cmd_line .= "$$parameters_ref->{path}/pair_fusion_and_splice_sequences.pl -fc $$files_ref->{fusion_counts_bare_file} -sc $$files_ref->{splice_counts_bare_file} -tag $$tag_ref -o $$files_ref->{fusion_and_splice_consensus_file}\n"; + + return $cmd_line; +} + + +sub sort_sam_files { + my $reads_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Sort SAM Files\n"; + + $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_alone_file_1} > $$files_ref->{sam_on_target_alone_file_1_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_alone_file_1} > $$files_ref->{sam_off_target_alone_file_1_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_alone_file_1} > $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_alone_file_1} > $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted}\n"; + + $cmd_line .= "samtools view -bS $$files_ref->{sam_file_1_full} > $$files_ref->{bam_file_1_full}\n"; + $cmd_line .= "samtools sort -n $$files_ref->{bam_file_1_full} $$files_ref->{bam_file_1_full_prefix}\n"; + $cmd_line .= "samtools view -h $$files_ref->{bam_file_1_full_sorted} > $$files_ref->{sam_file_1_full_sorted}\n"; + $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_1_full} > $$files_ref->{bam_dedup_file_1_full}\n"; + $cmd_line .= "samtools sort -n $$files_ref->{bam_dedup_file_1_full} $$files_ref->{bam_dedup_file_1_full_prefix}\n"; + $cmd_line .= "samtools view -h $$files_ref->{bam_dedup_file_1_full_sorted} > $$files_ref->{sam_dedup_file_1_full_sorted}\n"; + + if ( $$reads_ref eq 'paired' ) { # Need to make this possible to be reverse only too + + $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_file_1} > $$files_ref->{sam_on_target_file_1_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_file_1} > $$files_ref->{sam_off_target_file_1_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_file_1} > $$files_ref->{sam_dedup_on_target_file_1_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_file_1} > $$files_ref->{sam_dedup_off_target_file_1_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_alone_file_2} > $$files_ref->{sam_on_target_alone_file_2_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_alone_file_2} > $$files_ref->{sam_off_target_alone_file_2_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_alone_file_2} > $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_alone_file_2} > $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_file_2} > $$files_ref->{sam_on_target_file_2_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_file_2} > $$files_ref->{sam_off_target_file_2_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_file_2} > $$files_ref->{sam_dedup_on_target_file_2_linux_sorted}\n"; + $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_file_2} > $$files_ref->{sam_dedup_off_target_file_2_linux_sorted}\n"; + + $cmd_line .= "samtools view -bS $$files_ref->{sam_file_2_full} > $$files_ref->{bam_file_2_full}\n"; + $cmd_line .= "samtools sort -n $$files_ref->{bam_file_2_full} $$files_ref->{bam_file_2_full_prefix}\n"; + $cmd_line .= "samtools view -h $$files_ref->{bam_file_2_full_sorted} > $$files_ref->{sam_file_2_full_sorted}\n"; + $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_2_full} > $$files_ref->{bam_dedup_file_2_full}\n"; + $cmd_line .= "samtools sort -n $$files_ref->{bam_dedup_file_2_full} $$files_ref->{bam_dedup_file_2_full_prefix}\n"; + $cmd_line .= "samtools view -h $$files_ref->{bam_dedup_file_2_full_sorted} > $$files_ref->{sam_dedup_file_2_full_sorted}\n"; + } + return $cmd_line; +} + + +sub on_target_stats { + my $reads_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo On-target Stats\n"; + + if ( -e $$files_ref->{target_regions_file} ) { + if ( -s $$files_ref->{target_regions_file} ) { + + if ( $$reads_ref eq 'single' ) { # Need to make this possible to be reverse only too + + # Counts of on- and off-target reads + $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_on_target_alone_file_1_linux_sorted} -off_alone_1 $$files_ref->{sam_off_target_alone_file_1_linux_sorted} -o $$files_ref->{on_target_file}\n"; + $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted} -off_alone_1 $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted} -o $$files_ref->{on_target_dedup_file}\n"; + + # On-target Stats and Housekeeping Stats + $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_file_1} -o $$files_ref->{reads_per_exon_file} -oh $$files_ref->{housekeeping_file}\n"; + $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_dedup_file_1} -o $$files_ref->{reads_per_exon_dedup_file} -oh $$files_ref->{housekeeping_dedup_file} -om $$files_ref->{reads_per_exon_dedup_file_machine}\n"; + } + else{ + # Counts of on- and off-target reads + $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_on_target_alone_file_1_linux_sorted} -on_alone_2 $$files_ref->{sam_on_target_alone_file_2_linux_sorted} -off_alone_1 $$files_ref->{sam_off_target_alone_file_1_linux_sorted} -off_alone_2 $$files_ref->{sam_off_target_alone_file_2_linux_sorted} -on_1 $$files_ref->{sam_on_target_file_1_linux_sorted} -on_2 $$files_ref->{sam_on_target_file_2_linux_sorted} -off_1 $$files_ref->{sam_off_target_file_1_linux_sorted} -off_2 $$files_ref->{sam_off_target_file_2_linux_sorted} -o $$files_ref->{on_target_file}\n"; + $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted} -on_alone_2 $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted} -off_alone_1 $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted} -off_alone_2 $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted} -on_1 $$files_ref->{sam_dedup_on_target_file_1_linux_sorted} -on_2 $$files_ref->{sam_dedup_on_target_file_2_linux_sorted} -off_1 $$files_ref->{sam_dedup_off_target_file_1_linux_sorted} -off_2 $$files_ref->{sam_dedup_off_target_file_2_linux_sorted} -o $$files_ref->{on_target_dedup_file}\n"; + + # On-target Stats and Housekeeping Stats + $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_file_1} -i2 $$files_ref->{intersect_file_2} -o $$files_ref->{reads_per_exon_file} -oh $$files_ref->{housekeeping_file}\n"; + + $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_dedup_file_1} -i2 $$files_ref->{intersect_dedup_file_2} -o $$files_ref->{reads_per_exon_dedup_file} -oh $$files_ref->{housekeeping_dedup_file} -om $$files_ref->{reads_per_exon_dedup_file_machine}\n"; + } + } + } + return $cmd_line; +} + +sub total_molecule_counts { + my $reads_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Total Molecule Counts\n"; + if ( $$reads_ref eq 'single' ) { + $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_file_1_full_sorted} -o $$files_ref->{total_and_aligned_molecule_count_per_tag_file}\n"; + } + else { + $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_file_1_full_sorted} -r2 $$files_ref->{sam_file_2_full_sorted} -o $$files_ref->{total_and_aligned_molecule_count_per_tag_file}\n"; + } + return $cmd_line; +} + + +sub de_duplicated_molecule_counts { + my $reads_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo De-duplicated Molecule Counts\n"; + if ( $$reads_ref eq 'single' ) { + $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_dedup_file_1_full_sorted} -o $$files_ref->{unique_and_aligned_molecule_count_per_tag_file}\n"; + } + else { + $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_dedup_file_1_full_sorted} -r2 $$files_ref->{sam_dedup_file_2_full_sorted} -o $$files_ref->{unique_and_aligned_molecule_count_per_tag_file}\n"; + } + return $cmd_line; +} + + +sub all_molecule_counts { + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo All Molecule Counts\n"; + $cmd_line .= "$$parameters_ref->{path}/counts_2.pl -tamc $$files_ref->{total_and_aligned_molecule_count_per_tag_file} -uamc $$files_ref->{unique_and_aligned_molecule_count_per_tag_file} -otd $$files_ref->{on_target_dedup_file} -ot $$files_ref->{on_target_file} -o $$files_ref->{counts_file} -om $$files_ref->{counts_file_machine}\n"; + return $cmd_line; +} + +sub qc_check { + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo QC Check\n"; + $cmd_line .= "$$parameters_ref->{path}/qc_check.pl -hd $$files_ref->{housekeeping_dedup_file} -o $$files_ref->{qc_filter_file} -om $$files_ref->{qc_filter_file_machine}\n"; + return $cmd_line; +} + +sub coverage_uniformity { + my $reads_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Coverage Uniformity\n"; + $cmd_line .= "$$parameters_ref->{path}/coverage_uniformity.pl -hd $$files_ref->{housekeeping_dedup_file} -r $$reads_ref -o $$files_ref->{coverage_uniformity_file} -om $$files_ref->{coverage_uniformity_file_machine}\n"; + return $cmd_line; +} + + +sub summary { + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Summary\n"; + +# my $sample_name = "'" . @$samples_array_ref[$sample_element_number] . "'"; + + $cmd_line .= "$$parameters_ref->{path}/summary.pl -s $$tag_ref -o $$files_ref->{summary_file} -om $$files_ref->{summary_file_machine}\n"; + $cmd_line .= "cat $$files_ref->{qc_filter_file} >> $$files_ref->{summary_file}\n"; +# $cmd_line .= "cat $$files_ref->{coverage_uniformity_file} >> $$files_ref->{summary_file}\n"; + $cmd_line .= "cat $$files_ref->{counts_file} >> $$files_ref->{summary_file}\n"; + if ( -e $$files_ref->{reads_per_exon_dedup_file} ) { + if ( -s $$files_ref->{reads_per_exon_dedup_file} ) { + $cmd_line .= "cat $$files_ref->{reads_per_exon_dedup_file} >> $$files_ref->{summary_file}\n"; + } + } + $cmd_line .= "cat $$files_ref->{fusion_counts_with_splice_bare_file} >> $$files_ref->{summary_file}\n"; +# $cmd_line .= "enscript -f Courier8 -p $$files_ref->{summary_file_ps} $$files_ref->{summary_file}\n"; +# $cmd_line .= "ps2pdf $$files_ref->{summary_file_ps} $$files_ref->{summary_file_pdf}\n"; + + # Machine Readable Summary File + $cmd_line .= "cat $$files_ref->{qc_filter_file_machine} >> $$files_ref->{summary_file_machine}\n"; +# $cmd_line .= "cat $$files_ref->{coverage_uniformity_file_machine} >> $$files_ref->{summary_file_machine}\n"; + $cmd_line .= "cat $$files_ref->{counts_file_machine} >> $$files_ref->{summary_file_machine}\n"; + if ( -e $$files_ref->{reads_per_exon_dedup_file_machine} ) { + if ( -s $$files_ref->{reads_per_exon_dedup_file_machine} ) { + $cmd_line .= "cat $$files_ref->{reads_per_exon_dedup_file_machine} >> $$files_ref->{summary_file_machine}\n"; + } + } + $cmd_line .= "cat $$files_ref->{fusion_counts_with_splice_bare_file_machine} >> $$files_ref->{summary_file_machine}\n"; + + return $cmd_line; +} + + +sub clean_up { + my $reads_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $file; + my $cmd_line = "echo Clean Up\n"; + + # Clean up Flanking Sequences Files + $cmd_line .= "$$parameters_ref->{path}/clean_up_flanking_sequences.pl -f $$files_ref->{flanking_sequences_file}\n"; + + # Clean up Flanking Splice Sequences Files + $cmd_line .= "$$parameters_ref->{path}/clean_up_flanking_sequences.pl -f $$files_ref->{flanking_splice_sequences_file}\n"; + + # Clean up + $cmd_line .= "rm $$files_ref->{sam_file_1_full}\n"; + $cmd_line .= "rm $$files_ref->{bam_file_1}\n"; + $cmd_line .= "rm $$files_ref->{bed_file_1}\n"; + $cmd_line .= "rm $$files_ref->{bed_file_combined}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_file_1}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_file_1_full}\n"; + $cmd_line .= "rm $$files_ref->{bam_dedup_file_1}\n"; + $cmd_line .= "rm $$files_ref->{full_master_prejoin_file_1}\n"; + $cmd_line .= "rm $$files_ref->{full_master_file_1}\n"; + $cmd_line .= "rm $$files_ref->{full_master_dedup_prejoin_file_1}\n"; + $cmd_line .= "rm $$files_ref->{full_master_dedup_file_1}\n"; + $cmd_line .= "rm $$files_ref->{bed_points_file_1}\n"; + $cmd_line .= "rm $$files_ref->{bed_points_dedup_file_1}\n"; + $cmd_line .= "rm $$files_ref->{intersect_file_1}\n"; + $cmd_line .= "rm $$files_ref->{intersect_dedup_file_1}\n"; + $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_1}\n"; + $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_1}\n"; + $cmd_line .= "rm $$files_ref->{master_dedup_no_annotation_file_1}\n"; + $cmd_line .= "rm $$files_ref->{master_dedup_prejoin_file_1}\n"; + $cmd_line .= "rm $$files_ref->{master_dedup_file_1}\n"; + $cmd_line .= "rm $$files_ref->{fusion_reads_file}\n"; + $cmd_line .= "rm $$files_ref->{fusion_counts_file}\n"; + $cmd_line .= "rm $$files_ref->{fusion_counts_bare_file}\n"; +# $file = $$files_ref->{flanking_sequences_file} . ".tmp~"; +# $cmd_line .= "rm $file\n"; +# $file = $$files_ref->{flanking_splice_sequences_file} . ".tmp~"; +# $cmd_line .= "rm $file\n"; + $cmd_line .= "rm $$files_ref->{splice_counts_file}\n"; + $cmd_line .= "rm $$files_ref->{splice_counts_bare_file}\n"; + $cmd_line .= "rm $$files_ref->{fusion_counts_with_splice_bare_file}\n"; + $cmd_line .= "rm $$files_ref->{fusion_counts_with_splice_bare_file_machine}\n"; + $cmd_line .= "rm $$files_ref->{splice_reads_file}\n"; + $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_1_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_1_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{housekeeping_dedup_file}\n"; + $cmd_line .= "rm $$files_ref->{on_target_dedup_file}\n"; + $cmd_line .= "rm $$files_ref->{reads_per_exon_dedup_file}\n"; + $cmd_line .= "rm $$files_ref->{reads_per_exon_dedup_file_machine}\n"; + $cmd_line .= "rm $$files_ref->{housekeeping_file}\n"; + $cmd_line .= "rm $$files_ref->{on_target_file}\n"; + $cmd_line .= "rm $$files_ref->{reads_per_exon_file}\n"; + $cmd_line .= "rm $$files_ref->{sam_file_1_marked}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_file_1_marked}\n"; + $cmd_line .= "rm $$files_ref->{bam_file_1_full}\n"; + $cmd_line .= "rm $$files_ref->{bam_file_1_full_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_file_1_full_sorted}\n"; +# $cmd_line .= "rm $$files_ref->{total_and_aligned_molecule_count_file_1}\n"; + $cmd_line .= "rm $$files_ref->{total_and_aligned_molecule_count_per_tag_file}\n"; +# $cmd_line .= "rm $$files_ref->{unique_and_aligned_molecule_count_file_1}\n"; + $cmd_line .= "rm $$files_ref->{unique_and_aligned_molecule_count_per_tag_file}\n"; + $cmd_line .= "rm $$files_ref->{bam_dedup_file_1_full}\n"; + $cmd_line .= "rm $$files_ref->{bam_dedup_file_1_full_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_file_1_full_sorted}\n"; + $cmd_line .= "rm $$files_ref->{qc_filter_file}\n"; + $cmd_line .= "rm $$files_ref->{qc_filter_file_machine}\n"; + $cmd_line .= "rm $$files_ref->{coverage_uniformity_file}\n"; + $cmd_line .= "rm $$files_ref->{coverage_uniformity_file_machine}\n"; + $cmd_line .= "rm $$files_ref->{counts_file}\n"; + $cmd_line .= "rm $$files_ref->{counts_file_machine}\n"; +# $cmd_line .= "rm $$files_ref->{summary_file_ps}\n"; + if ( $$reads_ref eq 'paired' ) { + $cmd_line .= "rm $$files_ref->{sam_file_2_full}\n"; + $cmd_line .= "rm $$files_ref->{bam_file_2}\n"; + $cmd_line .= "rm $$files_ref->{bed_file_2}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_file_2}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_file_2_full}\n"; + $cmd_line .= "rm $$files_ref->{bam_dedup_file_2}\n"; + $cmd_line .= "rm $$files_ref->{full_master_prejoin_file_2}\n"; + $cmd_line .= "rm $$files_ref->{full_master_file_2}\n"; + $cmd_line .= "rm $$files_ref->{full_master_dedup_prejoin_file_2}\n"; + $cmd_line .= "rm $$files_ref->{full_master_dedup_file_2}\n"; + $cmd_line .= "rm $$files_ref->{bed_points_file_2}\n"; + $cmd_line .= "rm $$files_ref->{bed_points_dedup_file_2}\n"; + $cmd_line .= "rm $$files_ref->{intersect_file_2}\n"; + $cmd_line .= "rm $$files_ref->{intersect_dedup_file_2}\n"; + $cmd_line .= "rm $$files_ref->{intersect_file_combined}\n"; + $cmd_line .= "rm $$files_ref->{intersect_dedup_file_combined}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_1}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_1}\n"; + $cmd_line .= "rm $$files_ref->{sam_on_target_file_1}\n"; + $cmd_line .= "rm $$files_ref->{sam_off_target_file_1}\n"; + $cmd_line .= "rm $$files_ref->{sam_on_target_file_2}\n"; + $cmd_line .= "rm $$files_ref->{sam_off_target_file_2}\n"; + $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_2}\n"; + $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_2}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_2}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_2}\n"; + $cmd_line .= "rm $$files_ref->{master_dedup_no_annotation_file_2}\n"; + $cmd_line .= "rm $$files_ref->{master_dedup_prejoin_file_2}\n"; + $cmd_line .= "rm $$files_ref->{master_dedup_file_2}\n"; + $cmd_line .= "rm $$files_ref->{sam_on_target_file_1_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_off_target_file_1_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_file_1_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_file_1_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_2_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_2_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_on_target_file_2_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_off_target_file_2_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_file_2_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_file_2_linux_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_file_2_marked}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_file_2_marked}\n"; + $cmd_line .= "rm $$files_ref->{bam_file_2_full}\n"; + $cmd_line .= "rm $$files_ref->{bam_file_2_full_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_file_2_full_sorted}\n"; +# $cmd_line .= "rm $$files_ref->{total_and_aligned_molecule_count_file_2}\n"; +# $cmd_line .= "rm $$files_ref->{unique_and_aligned_molecule_count_file_2}\n"; + $cmd_line .= "rm $$files_ref->{bam_dedup_file_2_full}\n"; + $cmd_line .= "rm $$files_ref->{bam_dedup_file_2_full_sorted}\n"; + $cmd_line .= "rm $$files_ref->{sam_dedup_file_2_full_sorted}\n"; + } + + return $cmd_line; +} + + +sub summary_for_unprocessed_sample { + my $reads_ref = shift; + my $fastq_file_1_ref = shift; + my $fastq_file_2_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + my $cmd_line = "echo Summary for Unprocessed Sample\n"; + $cmd_line .= "$$parameters_ref->{path}/summary_for_unprocessed_samples.pl -r $$reads_ref -f1 $$fastq_file_1_ref -f2 $$fastq_file_2_ref -o $$files_ref->{summary_file} -om $$files_ref->{summary_file_machine}\n"; +# $cmd_line .= "enscript -f Courier8 -p $$files_ref->{summary_file_ps} $$files_ref->{summary_file}\n"; +# $cmd_line .= "ps2pdf $$files_ref->{summary_file_ps} $$files_ref->{summary_file_pdf}\n"; + return $cmd_line; +} + + +sub join_multiple_samples { + my $parameters_ref = shift; + my $cmd_line = "echo Join Multiple Samples\n"; + $cmd_line = "$$parameters_ref->{path}/join_multisample_output.pl -config $$parameters_ref->{config_file} -o $$parameters_ref->{outputfile}\n"; + return $cmd_line; +} + +#sub define_alignment_file_names { +# my $reads_ref = shift; +# my $tags_array_ref = shift; +# my $parameters_ref = shift; +# my $files_ref = shift; +# $$files_ref->{sam_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam"; +# $$files_ref->{sam_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.full"; +# if ( $$reads_ref eq 'paired' ) { +# $$files_ref->{sam_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam"; +# $$files_ref->{sam_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.full"; +# } +#} + +sub define_alignments_file_names { + my $reads_ref = shift; + my $tags_array_ref = shift; + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{sam_file_1_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fastq.sam"; + $$files_ref->{sam_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.full"; + $$files_ref->{sam_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam"; + $$files_ref->{bam_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bam"; + $$files_ref->{bed_file_1_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bed.orig"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{sam_file_2_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".fastq.sam"; + $$files_ref->{sam_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.full"; + $$files_ref->{sam_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam"; + $$files_ref->{bam_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bam"; + $$files_ref->{bed_file_2_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bed.orig"; + } + $$files_ref->{bed_file_combined} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".combined.bed"; +} + +sub define_de_duplication_file_names { + my $reads_ref = shift; + my $tags_array_ref = shift; + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup_read_ids.dat"; + $$files_ref->{sam_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.dedup"; + $$files_ref->{sam_dedup_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.sam.full"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{sam_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.dedup"; + $$files_ref->{sam_dedup_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.sam.full"; + } +} + +sub define_on_and_off_target_file_names { + my $reads_ref = shift; + my $tags_array_ref = shift; + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + # File 1 + $$files_ref->{target_regions_bed_file} = $$parameters_ref->{directory} . "/target_regions.bed"; + $$files_ref->{full_master_prejoin_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.prejoin.master.dat"; + $$files_ref->{full_master_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.master.dat"; + $$files_ref->{bed_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bed"; + $$files_ref->{bed_points_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".points.bed"; + $$files_ref->{intersect_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".intersect.dat"; + $$files_ref->{sam_on_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.alone.sam"; + $$files_ref->{sam_file_1_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.marked"; + $$files_ref->{sam_off_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.alone.sam"; + $$files_ref->{sam_on_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.sam"; + $$files_ref->{sam_off_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.sam"; + # File 1 Dedup + $$files_ref->{full_master_dedup_prejoin_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.dedup.prejoin.master.dat"; + $$files_ref->{full_master_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.dedup.master.dat"; + $$files_ref->{bed_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.bed"; + $$files_ref->{bed_points_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.points.bed"; + $$files_ref->{intersect_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.intersect.dat"; + $$files_ref->{sam_dedup_on_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.alone.sam"; + $$files_ref->{sam_dedup_file_1_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.dedup.marked"; + $$files_ref->{sam_dedup_off_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.alone.sam"; + $$files_ref->{sam_dedup_on_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.sam"; + $$files_ref->{sam_dedup_off_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.sam"; + # File 2 + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{full_master_prejoin_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.prejoin.master.dat"; + $$files_ref->{full_master_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.master.dat"; + $$files_ref->{bed_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bed"; + $$files_ref->{bed_points_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".points.bed"; + $$files_ref->{intersect_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".intersect.dat"; + $$files_ref->{sam_on_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.alone.sam"; + $$files_ref->{sam_file_2_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.marked"; + $$files_ref->{sam_off_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.alone.sam"; + $$files_ref->{sam_on_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.sam"; + $$files_ref->{sam_off_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.sam"; + } + # File 2 Dedup + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{full_master_dedup_prejoin_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.dedup.prejoin.master.dat"; + $$files_ref->{full_master_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.dedup.master.dat"; + $$files_ref->{bed_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.bed"; + $$files_ref->{bed_points_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.points.bed"; + $$files_ref->{intersect_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.intersect.dat"; + $$files_ref->{sam_dedup_on_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.alone.sam"; + $$files_ref->{sam_dedup_file_2_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.dedup.marked"; + $$files_ref->{sam_dedup_off_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.alone.sam"; + $$files_ref->{sam_dedup_on_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.sam"; + $$files_ref->{sam_dedup_off_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.sam"; + } + $$files_ref->{intersect_file_combined} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".intersect_combined.dat"; + $$files_ref->{intersect_dedup_file_combined} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.intersect_combined.dat"; +} + +sub define_coverage_and_start_site_file_names { + my $reads_ref = shift; + my $tags_array_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{start_site_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.start_site.bedgraph"; + $$files_ref->{coverage_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.coverage.bedgraph"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{start_site_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.start_site.bedgraph"; + $$files_ref->{coverage_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.coverage.bedgraph"; + } +} + +sub define_master_files_file_names { + my $reads_ref = shift; + my $tags_array_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{master_dedup_no_annotation_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.no_annotation.master.dat"; + $$files_ref->{master_dedup_prejoin_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.prejoin.master.dat"; + $$files_ref->{master_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.master.dat"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{master_dedup_no_annotation_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.no_annotation.master.dat"; + $$files_ref->{master_dedup_prejoin_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.prejoin.master.dat"; + $$files_ref->{master_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.master.dat"; + } +} + +sub define_fusion_reads_file_names { + my $reads_ref = shift; + my $tags_array_ref = shift; + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{one_segment_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".one_segment_reads.dat"; + $$files_ref->{splice_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".splice_reads.dat"; + $$files_ref->{fusion_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fusion_reads.dat"; + $$files_ref->{multi_fusion_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".multi_fusion_reads.dat"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{one_segment_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".one_segment_reads.dat"; + $$files_ref->{splice_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".splice_reads.dat"; + $$files_ref->{fusion_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".fusion_reads.dat"; + $$files_ref->{multi_fusion_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".multi_fusion_reads.dat"; + } + $$files_ref->{fusion_reads_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_reads.combined.dat"; + $$files_ref->{splice_reads_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".splice_reads.combined.dat"; +} + +sub define_count_fusions_file_names { + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{fusion_counts_bare_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts_bare.dat"; + $$files_ref->{fusion_counts_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts.dat"; + $$files_ref->{splice_counts_bare_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".splice_counts_bare.dat"; + $$files_ref->{splice_counts_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".splice_counts.dat"; + $$files_ref->{fusion_counts_with_splice_bare_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts_with_splice_bare.dat"; + $$files_ref->{fusion_counts_with_splice_bare_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts_with_splice_bare.machine.dat"; +} + +sub define_flanking_sequences_file_names { + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{flanking_sequences_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".flanking_sequences.dat"; + $$files_ref->{flanking_splice_sequences_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".flanking_splice_sequences.dat"; +} + +sub define_consensus_sequences_file_names { + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{consensus_fusion_std_out_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_fusion_std_out.dat"; + $$files_ref->{consensus_fusion_std_err_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_fusion_std_err.dat"; + $$files_ref->{consensus_splice_std_out_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_splice_std_out.dat"; + $$files_ref->{consensus_splice_std_err_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_splice_std_err.dat"; + $$files_ref->{fusion_and_splice_consensus_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_and_splice_consensus_sequences.fasta"; +} + +sub define_bam_dedup_files_file_names { + my $reads_ref = shift; + my $tags_array_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{bam_dedup_file_1} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[0] . ".dedup.bam"; + $$files_ref->{bam_dedup_sorted_file_1_name} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[0] . ".dedup.sorted"; + $$files_ref->{bam_dedup_sorted_file_1} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[0] . ".dedup.sorted.bam"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{bam_dedup_file_2} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[1] . ".dedup.bam"; + $$files_ref->{bam_dedup_sorted_file_2_name} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[1] . ".dedup.sorted"; + $$files_ref->{bam_dedup_sorted_file_2} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[1] . ".dedup.sorted.bam"; + } +} + +sub define_sort_sam_files_file_names { + my $reads_ref = shift; + my $tags_array_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{sam_on_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.alone.linux_sorted.sam"; + $$files_ref->{sam_off_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.alone.linux_sorted.sam"; + $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.alone.linux_sorted.sam"; + $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.alone.linux_sorted.sam"; + $$files_ref->{sam_on_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.linux_sorted.sam"; + $$files_ref->{sam_off_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.linux_sorted.sam"; + $$files_ref->{sam_dedup_on_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.linux_sorted.sam"; + $$files_ref->{sam_dedup_off_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.linux_sorted.sam"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{sam_on_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.alone.linux_sorted.sam"; + $$files_ref->{sam_off_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.alone.linux_sorted.sam"; + $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.alone.linux_sorted.sam"; + $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.alone.linux_sorted.sam"; + $$files_ref->{sam_on_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.linux_sorted.sam"; + $$files_ref->{sam_off_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.linux_sorted.sam"; + $$files_ref->{sam_dedup_on_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.linux_sorted.sam"; + $$files_ref->{sam_dedup_off_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.linux_sorted.sam"; + } + + $$files_ref->{bam_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bam.full"; + $$files_ref->{bam_file_1_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bam.full.prefix"; + $$files_ref->{bam_file_1_full_sorted} = $$files_ref->{bam_file_1_full_prefix} . ".bam"; + $$files_ref->{sam_file_1_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.full.sorted"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{bam_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bam.full"; + $$files_ref->{bam_file_2_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bam.full.prefix"; + $$files_ref->{bam_file_2_full_sorted} = $$files_ref->{bam_file_2_full_prefix} . ".bam"; + $$files_ref->{sam_file_2_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.full.sorted"; + } + + $$files_ref->{bam_dedup_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.bam.full"; + $$files_ref->{bam_dedup_file_1_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.bam.full.prefix"; + $$files_ref->{bam_dedup_file_1_full_sorted} = $$files_ref->{bam_dedup_file_1_full_prefix} . ".bam"; + $$files_ref->{sam_dedup_file_1_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.sam.full.sorted"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{bam_dedup_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.bam.full"; + $$files_ref->{bam_dedup_file_2_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.bam.full.prefix"; + $$files_ref->{bam_dedup_file_2_full_sorted} = $$files_ref->{bam_dedup_file_2_full_prefix} . ".bam"; + $$files_ref->{sam_dedup_file_2_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.sam.full.sorted"; + } +} + +sub define_on_target_stats_file_names { + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{on_target_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".on_target.dat"; + $$files_ref->{on_target_dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.on_target.dat"; + $$files_ref->{reads_per_exon_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".reads_per_exon.dat"; + $$files_ref->{housekeeping_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".housekeeping.dat"; + $$files_ref->{reads_per_exon_dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.reads_per_exon.dat"; + $$files_ref->{housekeeping_dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".housekeeping.dedup.dat"; + $$files_ref->{reads_per_exon_dedup_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.reads_per_exon.machine.dat"; +} + +sub define_total_molecule_counts_file_names { + my $reads_ref = shift; + my $tags_array_ref = shift; + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{total_and_aligned_molecule_count_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".total_and_aligned_molecule_count.dat"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{total_and_aligned_molecule_count_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".total_and_aligned_molecule_count.dat"; + } + $$files_ref->{total_and_aligned_molecule_count_per_tag_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".per_tag.total_and_aligned_molecule_count.dat"; +} + +sub define_de_deduplicated_molecule_counts_file_names { + my $reads_ref = shift; + my $tags_array_ref = shift; + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{unique_and_aligned_molecule_count_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".unique_and_aligned_molecule_count.dat"; + if ( $$reads_ref eq 'paired' ) { + $$files_ref->{unique_and_aligned_molecule_count_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".unique_and_aligned_molecule_count.dat"; + } + $$files_ref->{unique_and_aligned_molecule_count_per_tag_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".per_tag.unique_and_aligned_molecule_count.dat"; +} + +sub define_all_molecule_counts_file_names { + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{counts_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".counts.dat"; + $$files_ref->{counts_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".counts.machine.dat"; +} + + +sub define_qc_check_file_names { + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{qc_filter_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".qc_filter.dat"; + $$files_ref->{qc_filter_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".qc_filter.machine.dat"; +} + +sub define_coverage_uniformity_file_names { + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{coverage_uniformity_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".coverage_uniformity.dat"; + $$files_ref->{coverage_uniformity_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".coverage_uniformity.machine.dat"; +} + +sub define_summary_file_names { + my $tag_ref = shift; + my $parameters_ref = shift; + my $files_ref = shift; + $$files_ref->{summary_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.dat"; + $$files_ref->{summary_file_ps} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.ps"; + $$files_ref->{summary_file_pdf} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.pdf"; + $$files_ref->{summary_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.machine.dat"; +}