comparison bin/SpliceTrap @ 1:adc0f7765d85 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 15:06:58 -0400
parents
children
comparison
equal deleted inserted replaced
0:d4ca551ca300 1:adc0f7765d85
1 #!/usr/bin/perl
2 my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
3 # Author: wuj@cshl.edu
4 use strict;
5 use Getopt::Long;
6 ####################
7 use Cwd;
8 my $PROG = $0;
9 my $CUR_DIR = Cwd::abs_path(Cwd::cwd());
10 my $PROG_ABS_PATH = Cwd::abs_path($PROG);
11 #my $SrcFolder=`dirname $PROG_ABS_PATH`;
12 #chomp($SrcFolder);
13 #my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";
14 #my $SrcFolder=$config{SrcFolder};
15
16 my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' );
17 foreach my $program (@programs)
18 {
19 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
20
21 }
22
23 ####################
24 my $MapSoftware="bowtie";
25 my $DatabasePrefix="hg18";
26 my $ReadFileFormat="";
27 my $ReadFile1Name="";
28 my $ReadFile2Name="";
29 my $CutoffLevel="M";
30 my $Outputfolder=$CUR_DIR;
31 my $OutputPrefix="Result";
32 #my $CutoffOnly=0;
33 my $ReadSize=36;
34 my $JunctionCut=5;
35 my $onGalaxy="";
36 my $BowtieThreads=1;
37 my $noIRMstr="";
38 my $noIRM = 0;
39
40 GetOptions (
41 "m:s"=>\$MapSoftware,
42 "d:s"=>\$DatabasePrefix,
43 # "f:s"=>\$ReadFileFormat,
44 "1:s"=>\$ReadFile1Name,
45 "2:s"=>\$ReadFile2Name,
46 "c:s"=>\$CutoffLevel,
47 "outdir:s"=>\$Outputfolder,
48 "o:s"=>\$OutputPrefix,
49 "j:i"=>\$JunctionCut,
50 "s:i"=>\$ReadSize,
51 "p:i"=>\$BowtieThreads,
52 "noIRM|noirm"=>\$noIRM,
53 "g:s"=>\$onGalaxy
54 # "local:s"=>\$local,
55 # "rerun"=>\$CutoffOnly
56 );
57 #-O for galaxy output
58
59
60 my $InputParaDes=" Usage of the script:
61 -m Mapping software: [bowtie]/rmap
62 -d Database prefix: [hg18]/mm9/rn4/userdefined
63 -1 Read File 1
64 -2 Read File 2
65 -c Cutoff Level:H/[M]/L
66 Means High, Middle or Low
67 -j Junction reads requirement per junction for each exon-isoform [5]
68 -o Output prefix {Result}
69 -s Read Size [36]
70 --outdir Output folder [./]
71 -p Bowtie parameter, threads number, only use this when you don't use qsub [1]
72 --noIRM Skip the IRM correction step
73
74 This is a quick help, please refer to the README file for details.
75 ";
76
77 if($ReadFile2Name eq "")
78 {
79 $ReadFile2Name = $ReadFile1Name;
80 #trigger singled end mode
81 }
82
83 if($ReadFile1Name eq "" or $ReadFile2Name eq "" )
84 {
85 print $InputParaDes;
86 exit;
87 }
88
89 if($BowtieThreads < 1)
90 {
91 print $InputParaDes;
92 exit;
93 }
94
95 if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel")
96 {
97 print "CHECK: Error, the database you specified is not properly installed.\n";
98 #print $InputParaDes;
99 exit;
100
101 }
102
103 if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L")
104 {
105 print $InputParaDes;
106 exit;
107 }
108
109 $ReadFile1Name = Cwd::abs_path($ReadFile1Name);
110 $ReadFile2Name = Cwd::abs_path($ReadFile2Name);
111
112 #check the files
113 open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n");
114 my $checkoneline = <check>;
115 if(substr($checkoneline,0,1) eq ">")
116 {
117 $ReadFileFormat = "fasta";
118 }
119 elsif(substr($checkoneline,0,1) eq "@")
120 {
121 $ReadFileFormat = "fastq";
122 }
123 else
124 {
125 die("CHECK: ERROR:Please check $ReadFile1Name\n");
126 }
127 close(check);
128
129 open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n");
130 my $checkoneline = <check>;
131 if(substr($checkoneline,0,1) eq ">")
132 {
133 die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta");
134 }
135 elsif(substr($checkoneline,0,1) eq "@")
136 {
137 die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq");
138 }
139 else
140 {
141 die("CHECK: ERROR:Please check $ReadFile2Name\n");
142 }
143 close(check);
144
145 $Outputfolder= Cwd::abs_path($Outputfolder);
146 if($Outputfolder eq "/tmp")
147 {
148 while(-e $Outputfolder)
149 {
150 my $random_foldername = random_sessid();
151 $Outputfolder = "/tmp/".$random_foldername;
152 }
153 }
154
155
156 if(! -e $Outputfolder)
157 {
158 mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n";
159 }
160 if(! -d $Outputfolder)
161 {
162 die "CHECK: $Outputfolder is not a folder\n";
163 }
164
165 if($MapSoftware eq "bowtie")
166 {
167 print "CHECK: whether bowtie installed and in PATH\n";
168 my $bowtiechecker=`bowtie --version`;
169 if($bowtiechecker ne "")
170 {
171 print "CHECK: bowtie found, information below:\n";
172 print $bowtiechecker,"\n";
173 }
174 else
175 {
176 die "CHECK: No bowtie found in PATH, EXIT!\n";
177 }
178 }
179 elsif($MapSoftware eq "rmap")
180 {
181 print "CHECK: checking rmap...\n";
182 if(system("type rmap &>/dev/null") ==0 )
183 {
184 print "CHECK: rmap found, continue\n";
185 }
186 else
187 {
188 die "CHECK: No rmap found in PATH, EXIT!\n";
189 }
190 }
191 else
192 {
193 die "CHECK: option -m only takes rmap or bowtie as inputs\n";
194 }
195
196 if($ReadSize == 0)
197 {
198 die "CHECK: Please check option -s Read size\n";
199 }
200
201 if($noIRM)
202 {
203 $noIRMstr= "noirm";
204 }
205
206 #write more checks later
207 print "PARAMETERS:\tMapping software: ",$MapSoftware,"\n";
208 print "PARAMETERS:\tDatabase prefix: ",$DatabasePrefix,"\n";
209 print "PARAMETERS:\tRead end 1: ",$ReadFile1Name,"\n";
210 print "PARAMETERS:\tRead end 2: ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name);
211 print "PARAMETERS:\tCutoff level: ",$CutoffLevel,"\n";
212 print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n";
213 print "PARAMETERS:\tOutput folder: ",$Outputfolder,"\n";
214 print "PARAMETERS:\tOutput prefix: ",$OutputPrefix,"\n";
215 print "PARAMETERS:\tRead size: ",$ReadSize,"\n";
216 print "PARAMETERS:\tBowtie threads #: ",$BowtieThreads,"\n";
217 print "PARAMETERS:\tNo IRM.\n" if ($noIRM);
218
219 if($MapSoftware eq "bowtie")
220 {
221 print "=================STAGE 1 MAPPING===================\n";
222 system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads");
223 system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name);
224 print "=================STAGE 2 ESTIMATION================\n";
225
226 system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;
227 print "=================STAGE 3 CUTOFF====================\n";
228 system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");
229
230
231 }
232
233 if($MapSoftware eq "rmap")
234 {
235 print "=================STAGE 1 MAPPING===================\n";
236
237 system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ;
238 system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name);
239 print "=================STAGE 2 ESTIMATION================\n";
240
241 system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;
242 print "=================STAGE 3 CUTOFF====================\n";
243 system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");
244
245
246 }
247
248 print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n";
249
250 if($onGalaxy ne "")
251 {
252 system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy");
253 }
254
255 sub random_sessid
256 {
257 #my @chars = (0..9,a..z,A..Z);
258 my @chars = ('a'..'z','A'..'Z');
259 my $len = 10;
260 my $string = join '', map {$chars[rand(@chars)]} (1..$len);
261 return $string;
262 }
263