1
|
1 # Author: wuj@cshl.edu
|
|
2 use strict;
|
|
3 use Getopt::Long;
|
|
4 ####################
|
|
5 use Cwd;
|
|
6 my $PROG = $0;
|
|
7 my $CUR_DIR = Cwd::abs_path(Cwd::cwd());
|
|
8 my $PROG_ABS_PATH = Cwd::abs_path($PROG);
|
|
9 #my $SrcFolder=`dirname $PROG_ABS_PATH`;
|
|
10 #chomp($SrcFolder);
|
|
11 #my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";
|
|
12 #my $SrcFolder=$config{SrcFolder};
|
|
13
|
|
14 my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' );
|
|
15 foreach my $program (@programs)
|
|
16 {
|
|
17 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
|
|
18
|
|
19 }
|
|
20
|
|
21 ####################
|
|
22 my $MapSoftware="bowtie";
|
|
23 my $DatabasePrefix="hg18";
|
|
24 my $ReadFileFormat="";
|
|
25 my $ReadFile1Name="";
|
|
26 my $ReadFile2Name="";
|
|
27 my $CutoffLevel="M";
|
|
28 my $Outputfolder=$CUR_DIR;
|
|
29 my $OutputPrefix="Result";
|
|
30 #my $CutoffOnly=0;
|
|
31 my $ReadSize=36;
|
|
32 my $JunctionCut=5;
|
|
33 my $onGalaxy="";
|
|
34 my $BowtieThreads=1;
|
|
35 my $noIRMstr="";
|
|
36 my $noIRM = 0;
|
|
37
|
|
38 GetOptions (
|
|
39 "m:s"=>\$MapSoftware,
|
|
40 "d:s"=>\$DatabasePrefix,
|
|
41 # "f:s"=>\$ReadFileFormat,
|
|
42 "1:s"=>\$ReadFile1Name,
|
|
43 "2:s"=>\$ReadFile2Name,
|
|
44 "c:s"=>\$CutoffLevel,
|
|
45 "outdir:s"=>\$Outputfolder,
|
|
46 "o:s"=>\$OutputPrefix,
|
|
47 "j:i"=>\$JunctionCut,
|
|
48 "s:i"=>\$ReadSize,
|
|
49 "p:i"=>\$BowtieThreads,
|
|
50 "noIRM|noirm"=>\$noIRM,
|
|
51 "g:s"=>\$onGalaxy
|
|
52 # "local:s"=>\$local,
|
|
53 # "rerun"=>\$CutoffOnly
|
|
54 );
|
|
55 #-O for galaxy output
|
|
56
|
|
57
|
|
58 my $InputParaDes=" Usage of the script:
|
|
59 -m Mapping software: [bowtie]/rmap
|
|
60 -d Database prefix: [hg18]/mm9/rn4/userdefined
|
|
61 -1 Read File 1
|
|
62 -2 Read File 2
|
|
63 -c Cutoff Level:H/[M]/L
|
|
64 Means High, Middle or Low
|
|
65 -j Junction reads requirement per junction for each exon-isoform [5]
|
|
66 -o Output prefix {Result}
|
|
67 -s Read Size [36]
|
|
68 --outdir Output folder [./]
|
|
69 -p Bowtie parameter, threads number, only use this when you don't use qsub [1]
|
|
70 --noIRM Skip the IRM correction step
|
|
71
|
|
72 This is a quick help, please refer to the README file for details.
|
|
73 ";
|
|
74
|
|
75 if($ReadFile2Name eq "")
|
|
76 {
|
|
77 $ReadFile2Name = $ReadFile1Name;
|
|
78 #trigger singled end mode
|
|
79 }
|
|
80
|
|
81 if($ReadFile1Name eq "" or $ReadFile2Name eq "" )
|
|
82 {
|
|
83 print $InputParaDes;
|
|
84 exit;
|
|
85 }
|
|
86
|
|
87 if($BowtieThreads < 1)
|
|
88 {
|
|
89 print $InputParaDes;
|
|
90 exit;
|
|
91 }
|
|
92
|
|
93 if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel")
|
|
94 {
|
|
95 print "CHECK: Error, the database you specified is not properly installed.\n";
|
|
96 #print $InputParaDes;
|
|
97 exit;
|
|
98
|
|
99 }
|
|
100
|
|
101 if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L")
|
|
102 {
|
|
103 print $InputParaDes;
|
|
104 exit;
|
|
105 }
|
|
106
|
|
107 $ReadFile1Name = Cwd::abs_path($ReadFile1Name);
|
|
108 $ReadFile2Name = Cwd::abs_path($ReadFile2Name);
|
|
109
|
|
110 #check the files
|
|
111 open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n");
|
|
112 my $checkoneline = <check>;
|
|
113 if(substr($checkoneline,0,1) eq ">")
|
|
114 {
|
|
115 $ReadFileFormat = "fasta";
|
|
116 }
|
|
117 elsif(substr($checkoneline,0,1) eq "@")
|
|
118 {
|
|
119 $ReadFileFormat = "fastq";
|
|
120 }
|
|
121 else
|
|
122 {
|
|
123 die("CHECK: ERROR:Please check $ReadFile1Name\n");
|
|
124 }
|
|
125 close(check);
|
|
126
|
|
127 open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n");
|
|
128 my $checkoneline = <check>;
|
|
129 if(substr($checkoneline,0,1) eq ">")
|
|
130 {
|
|
131 die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta");
|
|
132 }
|
|
133 elsif(substr($checkoneline,0,1) eq "@")
|
|
134 {
|
|
135 die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq");
|
|
136 }
|
|
137 else
|
|
138 {
|
|
139 die("CHECK: ERROR:Please check $ReadFile2Name\n");
|
|
140 }
|
|
141 close(check);
|
|
142
|
|
143 $Outputfolder= Cwd::abs_path($Outputfolder);
|
|
144 if($Outputfolder eq "/tmp")
|
|
145 {
|
|
146 while(-e $Outputfolder)
|
|
147 {
|
|
148 my $random_foldername = random_sessid();
|
|
149 $Outputfolder = "/tmp/".$random_foldername;
|
|
150 }
|
|
151 }
|
|
152
|
|
153
|
|
154 if(! -e $Outputfolder)
|
|
155 {
|
|
156 mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n";
|
|
157 }
|
|
158 if(! -d $Outputfolder)
|
|
159 {
|
|
160 die "CHECK: $Outputfolder is not a folder\n";
|
|
161 }
|
|
162
|
|
163 if($MapSoftware eq "bowtie")
|
|
164 {
|
|
165 print "CHECK: whether bowtie installed and in PATH\n";
|
|
166 my $bowtiechecker=`bowtie --version`;
|
|
167 if($bowtiechecker ne "")
|
|
168 {
|
|
169 print "CHECK: bowtie found, information below:\n";
|
|
170 print $bowtiechecker,"\n";
|
|
171 }
|
|
172 else
|
|
173 {
|
|
174 die "CHECK: No bowtie found in PATH, EXIT!\n";
|
|
175 }
|
|
176 }
|
|
177 elsif($MapSoftware eq "rmap")
|
|
178 {
|
|
179 print "CHECK: checking rmap...\n";
|
|
180 if(system("type rmap &>/dev/null") ==0 )
|
|
181 {
|
|
182 print "CHECK: rmap found, continue\n";
|
|
183 }
|
|
184 else
|
|
185 {
|
|
186 die "CHECK: No rmap found in PATH, EXIT!\n";
|
|
187 }
|
|
188 }
|
|
189 else
|
|
190 {
|
|
191 die "CHECK: option -m only takes rmap or bowtie as inputs\n";
|
|
192 }
|
|
193
|
|
194 if($ReadSize == 0)
|
|
195 {
|
|
196 die "CHECK: Please check option -s Read size\n";
|
|
197 }
|
|
198
|
|
199 if($noIRM)
|
|
200 {
|
|
201 $noIRMstr= "noirm";
|
|
202 }
|
|
203
|
|
204 #write more checks later
|
|
205 print "PARAMETERS:\tMapping software: ",$MapSoftware,"\n";
|
|
206 print "PARAMETERS:\tDatabase prefix: ",$DatabasePrefix,"\n";
|
|
207 print "PARAMETERS:\tRead end 1: ",$ReadFile1Name,"\n";
|
|
208 print "PARAMETERS:\tRead end 2: ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name);
|
|
209 print "PARAMETERS:\tCutoff level: ",$CutoffLevel,"\n";
|
|
210 print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n";
|
|
211 print "PARAMETERS:\tOutput folder: ",$Outputfolder,"\n";
|
|
212 print "PARAMETERS:\tOutput prefix: ",$OutputPrefix,"\n";
|
|
213 print "PARAMETERS:\tRead size: ",$ReadSize,"\n";
|
|
214 print "PARAMETERS:\tBowtie threads #: ",$BowtieThreads,"\n";
|
|
215 print "PARAMETERS:\tNo IRM.\n" if ($noIRM);
|
|
216
|
|
217 if($MapSoftware eq "bowtie")
|
|
218 {
|
|
219 print "=================STAGE 1 MAPPING===================\n";
|
|
220 system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads");
|
|
221 system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name);
|
|
222 print "=================STAGE 2 ESTIMATION================\n";
|
|
223
|
|
224 system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;
|
|
225 print "=================STAGE 3 CUTOFF====================\n";
|
|
226 system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");
|
|
227
|
|
228
|
|
229 }
|
|
230
|
|
231 if($MapSoftware eq "rmap")
|
|
232 {
|
|
233 print "=================STAGE 1 MAPPING===================\n";
|
|
234
|
|
235 system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ;
|
|
236 system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name);
|
|
237 print "=================STAGE 2 ESTIMATION================\n";
|
|
238
|
|
239 system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;
|
|
240 print "=================STAGE 3 CUTOFF====================\n";
|
|
241 system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");
|
|
242
|
|
243
|
|
244 }
|
|
245
|
|
246 print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n";
|
|
247
|
|
248 if($onGalaxy ne "")
|
|
249 {
|
|
250 system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy");
|
|
251 }
|
|
252
|
|
253 sub random_sessid
|
|
254 {
|
|
255 #my @chars = (0..9,a..z,A..Z);
|
|
256 my @chars = ('a'..'z','A'..'Z');
|
|
257 my $len = 10;
|
|
258 my $string = join '', map {$chars[rand(@chars)]} (1..$len);
|
|
259 return $string;
|
|
260 }
|
|
261
|