annotate srst2.pl @ 1:599a4dc309aa draft

planemo upload commit 1ea98fb88a93a571beda5bbd56449c946860a258
author nml
date Wed, 01 Mar 2017 12:39:11 -0500
parents 6f870ed59b6e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6f870ed59b6e Uploaded
nml
parents:
diff changeset
1 #!/usr/bin/env perl
6f870ed59b6e Uploaded
nml
parents:
diff changeset
2
6f870ed59b6e Uploaded
nml
parents:
diff changeset
3
6f870ed59b6e Uploaded
nml
parents:
diff changeset
4 use strict;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
5 use warnings;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
6 use Cwd;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
7 use File::Copy;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
8
1
599a4dc309aa planemo upload commit 1ea98fb88a93a571beda5bbd56449c946860a258
nml
parents: 0
diff changeset
9 #The first 3 arguments should be in this format:
599a4dc309aa planemo upload commit 1ea98fb88a93a571beda5bbd56449c946860a258
nml
parents: 0
diff changeset
10 # bam_output scores_output pileup_output ...
0
6f870ed59b6e Uploaded
nml
parents:
diff changeset
11
6f870ed59b6e Uploaded
nml
parents:
diff changeset
12
6f870ed59b6e Uploaded
nml
parents:
diff changeset
13 my ($bam_results, $scores, $pileup, $job_type, $txt_results, $genes_results, $fullgenes_results, $name, $databases);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
14 my ($allele_results,$allele_type);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
15
6f870ed59b6e Uploaded
nml
parents:
diff changeset
16
6f870ed59b6e Uploaded
nml
parents:
diff changeset
17 $bam_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
18 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
19 $scores = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
20 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
21 $pileup = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
22 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
23
6f870ed59b6e Uploaded
nml
parents:
diff changeset
24 #Now that we shifted the first 4 arguments, get the rest depending on the job type. There are three:
6f870ed59b6e Uploaded
nml
parents:
diff changeset
25 #I pass a letter to tell us which job type was selected.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
26 $job_type = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
27 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
28
6f870ed59b6e Uploaded
nml
parents:
diff changeset
29 #If m, mlst only: we only have one mlst output file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
30 if($job_type eq "m")
6f870ed59b6e Uploaded
nml
parents:
diff changeset
31 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
32 $txt_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
33 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
34 $allele_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
35 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
36 $allele_type = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
37 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
38 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
39 #If g, genedb only: we have two outputs: genes and fullgenes. We also get the name of the database
6f870ed59b6e Uploaded
nml
parents:
diff changeset
40 elsif($job_type eq "g")
6f870ed59b6e Uploaded
nml
parents:
diff changeset
41 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
42 $genes_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
43 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
44 $fullgenes_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
45 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
46 $databases = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
47 shift (@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
48 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
49 #If b, both mlst and genedb: We will have three output files and the database name.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
50 else
6f870ed59b6e Uploaded
nml
parents:
diff changeset
51 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
52 $txt_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
53 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
54 $genes_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
55 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
56 $fullgenes_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
57 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
58 $databases = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
59 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
60 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
61
6f870ed59b6e Uploaded
nml
parents:
diff changeset
62 #After we get the output files/database name, now we get the name of the reads
6f870ed59b6e Uploaded
nml
parents:
diff changeset
63 #This allows SRST2 to give meaningful output instead of just printing 'dataset_xxx' as the sample name
6f870ed59b6e Uploaded
nml
parents:
diff changeset
64 my $filename = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
65 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
66
6f870ed59b6e Uploaded
nml
parents:
diff changeset
67 #This index offset is used to determine where the 'genedb' option is located.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
68 #If only a single-end input, the genedb will be 3 positions into the arguments:
6f870ed59b6e Uploaded
nml
parents:
diff changeset
69 # -input_se sample.fastq --genedb database.fasta
6f870ed59b6e Uploaded
nml
parents:
diff changeset
70 my $index_offset = 3;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
71
6f870ed59b6e Uploaded
nml
parents:
diff changeset
72 print @ARGV;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
73 #change the file extensions of the input reads so srst can use them
6f870ed59b6e Uploaded
nml
parents:
diff changeset
74
6f870ed59b6e Uploaded
nml
parents:
diff changeset
75 #Usually the file name looks like this: sample_S1_L001_R1_001.fastq
6f870ed59b6e Uploaded
nml
parents:
diff changeset
76 #If we use this file name, it confuses srst2 a lot. So we just extract
6f870ed59b6e Uploaded
nml
parents:
diff changeset
77 #the sample name to use as input file name.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
78 my @file_name = split /_/, $filename;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
79 $name = "temp_file_name";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
80
6f870ed59b6e Uploaded
nml
parents:
diff changeset
81 my ($for_read, $rev_read, $sing_read, $database);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
82 if ($ARGV[0] eq "--input_pe")
6f870ed59b6e Uploaded
nml
parents:
diff changeset
83 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
84 #Increment index offset if we paired-end inputs
6f870ed59b6e Uploaded
nml
parents:
diff changeset
85 $index_offset++;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
86
6f870ed59b6e Uploaded
nml
parents:
diff changeset
87 $for_read = $name."_1.dat";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
88 $rev_read = $name."_2.dat";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
89
6f870ed59b6e Uploaded
nml
parents:
diff changeset
90 symlink($ARGV[1], $for_read);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
91 symlink($ARGV[2], $rev_read);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
92
6f870ed59b6e Uploaded
nml
parents:
diff changeset
93 $ARGV[1] = $for_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
94 $ARGV[2] = $rev_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
95 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
96 else
6f870ed59b6e Uploaded
nml
parents:
diff changeset
97 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
98 $sing_read = $name.".dat";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
99
6f870ed59b6e Uploaded
nml
parents:
diff changeset
100 symlink($ARGV[1], $sing_read);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
101
6f870ed59b6e Uploaded
nml
parents:
diff changeset
102 $ARGV[1] = $sing_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
103
6f870ed59b6e Uploaded
nml
parents:
diff changeset
104 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
105 #If we are running a job to include genedb, use the database name for input file name
6f870ed59b6e Uploaded
nml
parents:
diff changeset
106 if ($job_type eq 'g' | $job_type eq 'b')
6f870ed59b6e Uploaded
nml
parents:
diff changeset
107 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
108 my @db_names = split /,/, $databases;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
109 my $num_db = @db_names;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
110 my %names_hash = ();
6f870ed59b6e Uploaded
nml
parents:
diff changeset
111 # loop through dbs to replace spaces with _ and check for duplicates
6f870ed59b6e Uploaded
nml
parents:
diff changeset
112 for (my $i = 0; $i < $num_db; $i++){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
113 $db_names[$i]=~s/ /_/g;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
114 if( exists($names_hash{$db_names[$i]}) ) {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
115 print STDERR "More than one database with the same name";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
116 exit(1);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
117 }else{
6f870ed59b6e Uploaded
nml
parents:
diff changeset
118 $names_hash{$db_names[$i]}=$db_names[$i];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
119 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
120 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
121
6f870ed59b6e Uploaded
nml
parents:
diff changeset
122
6f870ed59b6e Uploaded
nml
parents:
diff changeset
123 foreach my $db_name (@db_names){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
124 (my $base = $db_name) =~ s/\.[^.]+$//;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
125 $database = $base.".dat";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
126
6f870ed59b6e Uploaded
nml
parents:
diff changeset
127 symlink($ARGV[$index_offset], $database);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
128 $ARGV[$index_offset] = $database;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
129
6f870ed59b6e Uploaded
nml
parents:
diff changeset
130 $index_offset++;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
131 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
132 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
133
6f870ed59b6e Uploaded
nml
parents:
diff changeset
134 for (my $i =0; $i< @ARGV; $i++){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
135 if (index($ARGV[$i], "maxins") != -1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
136 my ($maxins, $minins);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
137 my @b2args = split(' ', $ARGV[$i]);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
138 for (my $j = 0; $j < @b2args; $j++){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
139 if (index($b2args[$j], "maxins") != -1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
140 $maxins = $b2args[$j+1];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
141 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
142 if (index($b2args[$j], "minins") != -1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
143 $minins = $b2args[$j+1];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
144 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
145 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
146 if ($maxins - $minins < 0){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
147 print STDERR "--minins cannot be greater than --maxins";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
148 exit(1);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
149 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
150 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
151 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
152
6f870ed59b6e Uploaded
nml
parents:
diff changeset
153
6f870ed59b6e Uploaded
nml
parents:
diff changeset
154
1
599a4dc309aa planemo upload commit 1ea98fb88a93a571beda5bbd56449c946860a258
nml
parents: 0
diff changeset
155 my $command = "srst2 @ARGV";
0
6f870ed59b6e Uploaded
nml
parents:
diff changeset
156
6f870ed59b6e Uploaded
nml
parents:
diff changeset
157 my $exit_code = system($command);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
158
6f870ed59b6e Uploaded
nml
parents:
diff changeset
159 my $cur_dir = getcwd();
6f870ed59b6e Uploaded
nml
parents:
diff changeset
160 # make arrays for using multiple custom databases (creates multiple output files - need to be concatenated)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
161 my (@genefiles, @bamfiles, @pileupfiles, @fullgenefiles, @scoresfiles);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
162
6f870ed59b6e Uploaded
nml
parents:
diff changeset
163 # go through files in the output directory to move/concatenate them as required.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
164
6f870ed59b6e Uploaded
nml
parents:
diff changeset
165 foreach my $file (<$cur_dir/*>)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
166 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
167 print $file, "\n";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
168 #Will cause problems if any files have 'mlst' in them
6f870ed59b6e Uploaded
nml
parents:
diff changeset
169 if ($file =~ /mlst/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
170 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
171 move($file, $txt_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
172 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
173 elsif ($file =~ /\.bam$/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
174 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
175 push @bamfiles, $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
176 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
177 elsif ($file =~ /\.scores$/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
178 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
179 push @scoresfiles, $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
180 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
181 elsif ($file =~ /\.pileup$/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
182 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
183 push @pileupfiles, $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
184 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
185 elsif ($file =~ /__fullgenes__/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
186 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
187 push @fullgenefiles, $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
188 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
189 elsif ($file =~ /__genes__/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
190 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
191 push @genefiles, $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
192 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
193 elsif ($file =~ /all_consensus_alleles.fasta$/ && $allele_type eq 'all') {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
194 move($file,$allele_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
195 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
196 elsif ($file =~ /new_consensus_alleles.fasta$/ && $allele_type eq 'new'){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
197 move($file,$allele_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
198 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
199 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
200
6f870ed59b6e Uploaded
nml
parents:
diff changeset
201
6f870ed59b6e Uploaded
nml
parents:
diff changeset
202 my ($cmd, $temp_head, $temp_full, $cat_header, $final_bam, @headers );
6f870ed59b6e Uploaded
nml
parents:
diff changeset
203
6f870ed59b6e Uploaded
nml
parents:
diff changeset
204 # create new concatenated bam file with all bam files.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
205 if (@bamfiles > 1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
206 my $counter = 0;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
207 $cat_header = "cat_header";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
208 while ($counter < @bamfiles) {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
209 $headers[$counter] = "bam_header".$counter;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
210 # make a header file for each bam results file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
211 my $cmd = "samtools view -H $bamfiles[$counter] > $headers[$counter]";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
212 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
213 if ($counter >= 1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
214 # only keep the @hd and @pg from first file because the final concatenated file can only have one of each (doesn't matter location)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
215 $temp_head="cut_head".$counter;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
216 # cut off first row and last row of each file (the @HD and @PG)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
217 $cmd = "tail -n +2 $headers[$counter] | head -n -1 > $temp_head";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
218 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
219 unlink $headers[$counter];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
220 # replace the old header with the new cut header in the array
6f870ed59b6e Uploaded
nml
parents:
diff changeset
221 $headers[$counter] = $temp_head;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
222 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
223 $counter++;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
224 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
225 # combine all header files
6f870ed59b6e Uploaded
nml
parents:
diff changeset
226 $cmd = "cat ".join(" ",@headers)." > $cat_header";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
227 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
228
6f870ed59b6e Uploaded
nml
parents:
diff changeset
229 $final_bam = "final_bam";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
230 # concatenate all the bam files *must include the concatenated header file created above
6f870ed59b6e Uploaded
nml
parents:
diff changeset
231 $cmd = "samtools cat -h $cat_header -o $final_bam ".join(" ",@bamfiles)." ";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
232 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
233
6f870ed59b6e Uploaded
nml
parents:
diff changeset
234 # sort the bam file so it can be indexed
6f870ed59b6e Uploaded
nml
parents:
diff changeset
235 $cmd = "samtools sort $final_bam 'last_bam'";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
236 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
237
6f870ed59b6e Uploaded
nml
parents:
diff changeset
238 # move bam file to where Galaxy expects it.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
239 $cmd = "mv 'last_bam.bam' $bam_results";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
240 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
241 } else {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
242 # only one bam file, don't need to concatenate
6f870ed59b6e Uploaded
nml
parents:
diff changeset
243 move($bamfiles[0], $bam_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
244 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
245
6f870ed59b6e Uploaded
nml
parents:
diff changeset
246 # concatenate all pileup files
6f870ed59b6e Uploaded
nml
parents:
diff changeset
247 if (@pileupfiles > 1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
248 $cmd = "cat ".join(" ",@pileupfiles)." > $pileup";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
249 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
250 } else {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
251 move($pileupfiles[0], $pileup);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
252 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
253
6f870ed59b6e Uploaded
nml
parents:
diff changeset
254 # perform find-replace to restore original user-specified file names
6f870ed59b6e Uploaded
nml
parents:
diff changeset
255 foreach my $gene (@genefiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
256 my $data = read_file($gene);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
257 $data =~ s/temp_file_name/$file_name[0]/g;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
258 write_file($gene, $data);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
259 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
260
6f870ed59b6e Uploaded
nml
parents:
diff changeset
261 foreach my $gene (@fullgenefiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
262 my $data = read_file($gene);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
263 $data =~ s/temp_file_name/$file_name[0]/g;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
264 write_file($gene, $data);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
265 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
266
6f870ed59b6e Uploaded
nml
parents:
diff changeset
267 # concatenate gene files with a space separating each file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
268 if (@genefiles > 1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
269 my $join = join(" <(echo) ", @genefiles);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
270 my @args = ("bash", "-c", "cat $join > $genes_results");
6f870ed59b6e Uploaded
nml
parents:
diff changeset
271 system(@args);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
272 } else {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
273 # only one gene results file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
274 move($genefiles[0], $genes_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
275 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
276
6f870ed59b6e Uploaded
nml
parents:
diff changeset
277 # concatenate full gene results files but only keep header in first file.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
278 if (@fullgenefiles >1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
279 for (my $i= 1; $i < @fullgenefiles; $i++){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
280 # go through all files but the first one to remove headers
6f870ed59b6e Uploaded
nml
parents:
diff changeset
281 # create a temp file to save the file after header is removed
6f870ed59b6e Uploaded
nml
parents:
diff changeset
282 $temp_full = "temp_full".$i;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
283 $cmd = "tail -n +2 $fullgenefiles[$i] > $temp_full";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
284 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
285 unlink $fullgenefiles[$i];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
286 $fullgenefiles[$i] = $temp_full;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
287 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
288 $cmd = "cat ". join(" ",@fullgenefiles)." > $fullgenes_results";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
289 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
290 } else{
6f870ed59b6e Uploaded
nml
parents:
diff changeset
291 # only one full gene results file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
292 move($fullgenefiles[0], $fullgenes_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
293 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
294
6f870ed59b6e Uploaded
nml
parents:
diff changeset
295 # concatenate full gene results files but only keep header in first file.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
296 if (@scoresfiles >1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
297 for (my $i= 1; $i < @scoresfiles; $i++){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
298 # go through all files but the first one to remove headers
6f870ed59b6e Uploaded
nml
parents:
diff changeset
299 # create a temp file to save the file after header is removed
6f870ed59b6e Uploaded
nml
parents:
diff changeset
300 $temp_full = "temp_full".$i;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
301 $cmd = "tail -n +2 $scoresfiles[$i] > $temp_full";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
302 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
303 unlink $scoresfiles[$i];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
304 $scoresfiles[$i] = $temp_full;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
305 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
306 $cmd = "cat ". join(" ",@scoresfiles)." > $scores";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
307 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
308 } else{
6f870ed59b6e Uploaded
nml
parents:
diff changeset
309 # only one scores file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
310 move($scoresfiles[0], $scores);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
311 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
312
6f870ed59b6e Uploaded
nml
parents:
diff changeset
313 # cleanup srst2 output and temp files
6f870ed59b6e Uploaded
nml
parents:
diff changeset
314 foreach my $file (@fullgenefiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
315 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
316 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
317 foreach my $file (@genefiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
318 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
319 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
320 foreach my $file (@pileupfiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
321 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
322 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
323 foreach my $file (@bamfiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
324 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
325 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
326 foreach my $file (@headers){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
327 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
328 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
329 foreach my $file (@scoresfiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
330 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
331 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
332 unlink $temp_head;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
333 unlink $temp_full;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
334 unlink $cat_header;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
335 unlink $final_bam;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
336
6f870ed59b6e Uploaded
nml
parents:
diff changeset
337 #get rid of symlinks
6f870ed59b6e Uploaded
nml
parents:
diff changeset
338 if ($for_read)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
339 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
340 unlink $for_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
341 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
342 if ($rev_read)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
343 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
344 unlink $rev_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
345 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
346 if ($sing_read)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
347 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
348 unlink $sing_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
349 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
350 if ($database)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
351 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
352 unlink $database;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
353 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
354 $exit_code = $exit_code >> 8;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
355 exit $exit_code;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
356
6f870ed59b6e Uploaded
nml
parents:
diff changeset
357 sub read_file {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
358 my ($filename) = @_;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
359
6f870ed59b6e Uploaded
nml
parents:
diff changeset
360 open my $in, '<:encoding(UTF-8)', $filename or die "Could not open '$filename' for reading $!";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
361 local $/ = undef;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
362 my $all = <$in>;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
363 close $in;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
364
6f870ed59b6e Uploaded
nml
parents:
diff changeset
365 return $all;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
366 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
367
6f870ed59b6e Uploaded
nml
parents:
diff changeset
368 sub write_file {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
369 my ($filename, $content) = @_;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
370
6f870ed59b6e Uploaded
nml
parents:
diff changeset
371 open my $out, '>:encoding(UTF-8)', $filename or die "Could not open '$filename' for writing $!";;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
372 print $out $content;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
373 close $out;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
374
6f870ed59b6e Uploaded
nml
parents:
diff changeset
375 return;
1
599a4dc309aa planemo upload commit 1ea98fb88a93a571beda5bbd56449c946860a258
nml
parents: 0
diff changeset
376 }