0
|
1 #!/usr/bin/perl
|
|
2
|
|
3
|
|
4 use Data::Dumper;
|
|
5 use Getopt::Long;
|
|
6 use Pod::Usage;
|
|
7
|
|
8
|
|
9
|
|
10 #pod2usage(-verbose => 1) if ($help == 1);
|
|
11 #if (@ARGV == 0) {
|
|
12 # pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2);
|
|
13 #}
|
|
14
|
|
15 my $rsem_version = "/opt/rsem-1.1.17";
|
|
16 my $minL = 1;
|
|
17 my $maxL = 1000;
|
|
18 my $NMB = 1024;
|
|
19
|
|
20 # Extra file output #beta
|
|
21 # --isoformfile $isoforms
|
|
22 # --thetafile $theta
|
|
23 # --cntfile $cnt
|
|
24 # --modelfile $model
|
|
25 # --bamfile $bam_res
|
|
26
|
|
27 GetOptions(
|
|
28 "log=s" => \$log,
|
|
29 "bam_genome=s" => \$bam_genome,
|
|
30 "bamtype=s" => \$bamtype,
|
|
31 "isoformfile=s" => \$isoforms,
|
|
32 "reference=s" => \$dbref,
|
|
33 "sampling-for-bam=s" => \$samplingbam,
|
|
34 "thetafile=s" => \$theta,
|
|
35 "cntfile=s" => \$cnt,
|
|
36 "modelfile=s" => \$model,
|
|
37 "bamfile=s" => \$bamfile,
|
|
38 "output=s" => \$output,
|
|
39 "single_fasta=s" => \$single_fasta,
|
|
40 "fasta1=s" => \$fasta1,
|
|
41 "fasta2=s" => \$fasta2,
|
|
42 "single_fastq=s" => \$single_fastq,
|
|
43 "fastq1=s" => \$fastq1,
|
|
44 "fastq2=s" => \$fastq2,
|
|
45 "no-qualities" => \$no_qual,
|
|
46 "paired-end" => \$paired_end,
|
|
47 "sam" => \$is_sam,
|
|
48 "bam" => \$is_bam,
|
|
49 "sam-header-info=s" => \$fn_list,
|
|
50 "tag=s" => \$tagName,
|
|
51 "seed-length=i" => \$L,
|
|
52 "bowtie-path=s" => \$bowtie_path,
|
|
53 "bowtie-n=i" => \$C,
|
|
54 "bowtie-e=i" => \$E,
|
|
55 "bowtie-m=i" => \$maxHits,
|
|
56 "phred33-quals" => \$phred33,
|
|
57 "phred64-quals" => \$phred64,
|
|
58 "solexa-quals" => \$solexa,
|
|
59 "forward-prob=f" => \$probF,
|
|
60 "fragment-length-min=i" => \$minL,
|
|
61 "fragment-length-max=i" => \$maxL,
|
|
62 "fragment-length-mean=f" => \$mean,
|
|
63 "fragment-length-sd=f" => \$sd,
|
|
64 "estimate-rspd=s" => \$estRSPD,
|
|
65 "num-rspd-bins=i" => \$B,
|
|
66 "p|num-threads=i" => \$nThreads,
|
|
67 "output-genome-bam" => \$genBamF,
|
|
68 "calc-ci=s" => \$calcCI,
|
|
69 "ci-memory=i" => \$NMB,
|
|
70 "time" => \$mTime,
|
|
71 "q|quiet" => \$quiet,
|
|
72 ) or pod2usage( -exitval => 2, -verbose => 2 );
|
|
73
|
|
74 #check parameters and options
|
|
75
|
|
76 if ($is_sam || $is_bam) {
|
|
77 pod2usage(-msg => "from rsem-wrapper->Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 4);
|
|
78 pod2usage(-msg => "--sam and --bam cannot be active at the same time!", -exitval => 2, -verbose => 2) if ($is_sam == 1&& $is_bam == 1);
|
|
79 pod2usage(-msg => "--bowtie-path, --bowtie-n, --bowtie-e, --bowtie-m, --phred33-quals, --phred64-quals or --solexa-quals cannot be set if input is SAM/BAM format!", -exitval => 2, -verbose => 2) if ($bowtie_path ne "" || $C != 2 || $E != 99999999 || $maxHits != 200 || $phred33 || $phred64 || $solexa);
|
|
80 }
|
|
81 #else {
|
|
82 # pod2usage(-msg => "from rsem-wraper->Invalid number of arguments!", -exitval => 2, -verbose => 2)
|
|
83 # if (!$paired_end && scalar(@ARGV) != 1 || $paired_end && scalar(@ARGV) != 1);
|
|
84 # pod2usage(-msg => "Only one of --phred33-quals --phred64-quals/--solexa1.3-quals --solexa-suqls can be active!", -exitval => 2, -verbose => 2) if ($phred33 + $phred64 + $solexa > 1);
|
|
85 # podwusage(-msg => "--sam , --bam or --sam-header-info cannot be set if use bowtie aligner to produce alignments!", -exitval => 2, -verbose => 2) if ($is_sam || $is_bam || $fn_list ne "");
|
|
86 #}
|
|
87
|
|
88 pod2usage(-msg => "Forward probability should be in [0, 1]!", -exitval => 2, -verbose => 2) if ($probF < 0 || $probF > 1);
|
|
89 pod2usage(-msg => "Min fragment length should be at least 1!", -exitval => 2, -verbose => 2) if ($minL < 1);
|
|
90 pod2usage(-msg => "Min fragment length should be smaller or equal to max fragment length!", -exitval => 2, -verbose => 2) if ($minL > $maxL);
|
|
91 pod2usage(-msg => "The memory allocated for calculating credibility intervals should be at least 1 MB!\n", -exitval => 2, -verbose => 2) if ($NMB < 1);
|
|
92 pod2usage(-msg => "Number of threads should be at least 1!\n", -exitval => 2, -verbose => 2) if ($nThreads < 1);
|
|
93
|
|
94 # IO Redirection to log file
|
|
95 use IO::Handle;
|
|
96 open OUTPUT, '>', $log or die "cant open file $log $!\n";;
|
|
97 open ERROR, '>>', $log or die "cant open file $log $!\n";
|
|
98 STDOUT->fdopen( \*OUTPUT, 'w' ) or die "cant open file $!\n";
|
|
99 STDERR->fdopen( \*ERROR, 'w' ) or die "cant open file $!\n";
|
|
100 #
|
|
101
|
|
102 my @options;
|
|
103
|
|
104 # generates new output called sample_name.genome.bam
|
|
105 # with alignments
|
|
106 # mapped to genomic coordinates and annotated with their posterior
|
|
107 # probabilities. In addition, RSEM will call samtools (included in
|
|
108 # RSEM package) to sort and index the bam file.
|
|
109 # 'sample_name.genome.sorted.bam' and
|
|
110 # 'sample_name.genome.sorted.bam.bai' will be generated. (Default: off)
|
|
111
|
|
112 if ($bamtype eq "yes") {
|
|
113 my $bam_genome_par = "--output-genome-bam";
|
|
114 push @options, $bam_genome_par;
|
|
115 }
|
|
116 if ($samplingbam eq "yes") {
|
|
117 my $samplingbam = "--sampling-for-bam";
|
|
118 push @options, $samplingbam;
|
|
119 }
|
|
120 if ($estRSPD eq "yes") {
|
|
121 my $rspd = "--estimate-rspd";
|
|
122 push @options, $rspd;
|
|
123 }
|
|
124 $probF = "--forward-prob $probF";
|
|
125 push @options, $probF;
|
|
126
|
|
127 if ($calcCI eq "yes") {
|
|
128 my $calcCI = "--calc-ci";
|
|
129 push @options, $calcCI;
|
|
130 my $cimem = "--ci-memory $NMB";
|
|
131 push @options, $cimem;
|
|
132 }
|
|
133 if ($tagName) {
|
|
134 my $tagName = "--tag $tagName";
|
|
135 push @options, $tagName;
|
|
136 }
|
|
137 if ($L) {
|
|
138 my $L = "--seed-length $L";
|
|
139 push @options, $L;
|
|
140 }
|
|
141 if ($C) {
|
|
142 my $C = "--bowtie-n $C";
|
|
143 push @options, $C;
|
|
144 }
|
|
145 if ($E) {
|
|
146 my $E = "--bowtie-e $E";
|
|
147 push @options, $E;
|
|
148 }
|
|
149 if ($maxHits) {
|
|
150 my $maxHits = "--bowtie-m $maxHits";
|
|
151 push @options, $maxHits;
|
|
152 }
|
|
153 if ($minL != 1) {
|
|
154 my $minL = "--fragment-length-min $minL";
|
|
155 push @options, $minL;
|
|
156 }
|
|
157 if ($maxL != 1000) {
|
|
158 my $maxL = "--fragment-length-max $maxL";
|
|
159 push @options, $maxL;
|
|
160 }
|
|
161 if ($mean) {
|
|
162 my $mean = "--fragment-length-mean $mean";
|
|
163 push @options, $mean;
|
|
164 }
|
|
165 if ($sd) {
|
|
166 my $sd = "--fragment-length-sd $sd";
|
|
167 push @options, $sd;
|
|
168 }
|
|
169 my $options= join(" ", @options);
|
|
170
|
|
171 #BUILD COMMAND BASED ON PARSED OPTIONS
|
|
172 if ($no_qual) {
|
|
173 #reads are in fasta file format
|
|
174 if ($paired_end) { # reads are in paired end
|
|
175 my $cmd = "$rsem_version/rsem-calculate-expression --quiet --no-qualities --paired-end -p $nThreads $options $fasta1 $fasta2 $dbref $output";
|
|
176 print "RSEM Parameters used by Galaxy:\n$cmd\n";
|
|
177 system($cmd);
|
|
178 }
|
|
179 #run single end with one fasta file
|
|
180 else {
|
|
181 my $cmd = "$rsem_version/rsem-calculate-expression --quiet --no-qualities -p $nThreads $options $single_fasta $dbref $output";
|
|
182 print "RSEM Parameters used by Galaxy:\n$cmd\n";
|
|
183 system($cmd);
|
|
184 }
|
|
185 }
|
|
186 else {
|
|
187 # reads are in fastq file format
|
|
188 # type of fastq file?
|
|
189 my $fastqtype;
|
|
190 if ($phred33) {
|
|
191 $fastqtype = "--phred33-quals";
|
|
192 }
|
|
193 elsif ($phred64) {
|
|
194 $fastqtype = "--phred64-quals";
|
|
195 }
|
|
196 elsif ($solexa) {
|
|
197 $fastqtype = "--solexa-quals";
|
|
198 }
|
|
199 if ($paired_end) {
|
|
200 #reads in paired end
|
|
201 #run paired end with two fasq files
|
|
202 my $cmd = "$rsem_version/rsem-calculate-expression --quiet --paired-end -p $nThreads $options $fastqtype $fastq1 $fastq2 $dbref $output";
|
|
203 print "RSEM Parameters used by Galaxy:\n$cmd\n";
|
|
204 system($cmd);
|
|
205 }
|
|
206 else {
|
|
207 my $cmd = "$rsem_version/rsem-calculate-expression --quiet -p $nThreads $options $fastqtype $single_fastq $dbref $output";
|
|
208 print "RSEM Parameters used by Galaxy:\n$cmd\n";
|
|
209 system($cmd);
|
|
210 }
|
|
211 }
|
|
212
|
|
213 #Rename files for galaxy
|
|
214 my $mv_genes = "mv $output.genes.results $output";
|
|
215 my $mv_isoforms = "mv $output.isoforms.results $isoforms";
|
|
216
|
|
217 #print "bamtype-parameter=$bamtype\n";
|
|
218 my $mv_bam_transcript;
|
|
219 my $mv_bam_genome;
|
|
220 if ($bamtype eq "yes") {
|
|
221 $mv_bam_genome = "mv $output.genome.sorted.bam $bam_genome";
|
|
222 system($mv_bam_genome);
|
|
223 }
|
|
224
|
|
225 $mv_bam_transcript = "mv $output.transcript.sorted.bam $bamfile";
|
|
226
|
|
227 my @rsem_dir = split(/\//, $output);
|
|
228 my $short_output = $rsem_dir[-1];
|
|
229 my $mv_theta = "mv $output.stat/$short_output.theta $theta";
|
|
230 my $mv_cnt = "mv $output.stat/$short_output.cnt $cnt";
|
|
231 my $mv_model = "mv $output.stat/$short_output.model $model";
|
|
232 system($mv_genes);
|
|
233 system($mv_isoforms);
|
|
234 system($mv_bam_transcript);
|
|
235 #system($mv_theta);
|
|
236 #system($mv_cnt);
|
|
237 #system($mv_model);
|
|
238 #print "LOG $mv\n";
|