comparison ALFA/alfa_wrapper.sh @ 18:a1e2ab10b317 draft

Uploaded
author charles-bernard
date Tue, 11 Oct 2016 09:18:48 -0400
parents
children 1239f24962d8
comparison
equal deleted inserted replaced
17:e3d439570972 18:a1e2ab10b317
1 #!usr/bin/bash
2
3 #########################################################################################################
4 # ARGUMENTS FROM alfa_wrapper.xml #
5 #########################################################################################################
6 configFile=$1;
7 logReport=$2;
8 sed -i -e '/^$/d; s/\t//g;' $configFile;
9 printf "__________________________________________________________________\n\n" > $logReport
10 printf " ALFA CONFIG \n" >> $logReport
11 printf "__________________________________________________________________\n" >> $logReport
12 cat $configFile >> $logReport
13
14 #########################################################################################################
15 # INITIALIZATION OF THE VARIABLES from $configFile #
16 #########################################################################################################
17 #_INPUT1
18 annotationSource=`grep -P '^annotationSource ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
19 if [ "$annotationSource" == "personal_gtf" ]; then
20 annotationFile=`grep -P '^annotationFile ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
21 elif [ "$annotationSource" == "built_in_index" ]; then
22 built_in_index_prefix=`grep -P '^built_in_index_prefix ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
23 else
24 strandedIndex=`grep -P '^strandedIndex ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
25 unstrandedIndex=`grep -P '^unstrandedIndex ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
26 fi
27
28 #_INPUT2
29 readsType=`grep -P '^readsType ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
30 readsFileList=`grep -P '^readsFile\[[0-9]+\] ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
31 readsLabelList=`grep -P '^readsLabel\[[0-9]+\] ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
32
33 #_OUTPUT CHOICES
34 plotChoice=`grep -P '^plotChoice ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
35 countFileChoice=`grep -P '^countFileChoice ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
36 indexChoice=`grep -P '^indexChoice ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
37
38 #_OUTPUT OPTIONS
39 strandness=`grep -P '^strandness ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
40 categoriesDepth=`grep -P '^categoriesDepth ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
41 plotFormat=`grep -P '^plotFormat ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
42 plotThresholdChoice=`grep -P '^plotThresholdChoice ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
43 if [ "$plotThresholdChoice" == "True" ]; then
44 yMin=`grep -P '^yMin ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
45 yMax=`grep -P '^yMax ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
46 fi
47
48 #_OUTPUT FILES
49 if [ "$plotChoice" == "True" ]; then
50 if [ "$plotFormat" == "pdf" ]; then
51 outputPdf=`grep -P '^outputPdf ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
52 elif [ "$plotFormat" == "svg" ]; then
53 outputCategoriesSvg=`grep -P '^outputCategoriesSvg ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
54 outputBiotypesSvg=`grep -P '^outputBiotypesSvg ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
55 else
56 outputCategoriesPng=`grep -P '^outputCategoriesPng ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
57 outputBiotypesPng=`grep -P '^outputBiotypesPng ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
58 fi
59 fi
60 if [ "$countFileChoice" == "True" ]; then
61 outputCountFile=`grep -P '^outputCountFile ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
62 fi
63 if [ "$indexChoice" == "True" ]; then
64 outputStrandedIndex=`grep -P '^outputStrandedIndex ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
65 outputUnstrandedIndex=`grep -P '^outputUnstrandedIndex ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
66 fi
67
68 #########################################################################################################
69 # CREATION OF A TMP DIRECTORY FOR THE OUTPUT FILES OF ALFA AND cd #
70 #########################################################################################################
71 outputDirectory=`mktemp -d /export/home1/users/biocomp/chbernar/galaxy/database/tmp/tmpXXXXXX`;
72 if [ -d $outputDirectory ]; then
73 chmod -R ugo+wrx $outputDirectory;
74 rm -R $outputDirectory;
75 fi
76 mkdir $outputDirectory;
77 chmod -R ugo+wrx $outputDirectory;
78 cd $outputDirectory;
79
80 #########################################################################################################
81 # TEST OF INPUT1 #
82 #########################################################################################################
83 if [ "$annotationSource" == "index" ]; then
84 #need to copy the files.dat to .*index because ALFA requires the extension ".(un)stranded.index"
85 index="index"
86 cp $strandedIndex $index".stranded.index"
87 cp $unstrandedIndex $index".unstranded.index"
88 fi
89
90 #########################################################################################################
91 # TEST OF INPUT2 AND DETERMINATION OF PYTHON READS INPUT ARGUMENT #
92 #########################################################################################################
93 readsListLen=`echo "$readsFileList" | wc -l`;
94 readsInput="";
95 for (( i = 1; i <= readsListLen; i++ )) do
96 readsFile[$i]=`echo "$readsFileList" | awk -v i=$i 'NR==i'`;
97 readsLabel[$i]=`echo "$readsLabelList" | awk -v i=$i 'NR==i' | sed -e 's/ /_/g'`;
98 if [ "$readsType" == "bam" ]; then
99 bamSorted=`samtools view -H "${readsFile[$i]}" | grep -c 'SO:unsorted'`
100 if [ "$bamSorted" != "0" ] ; then
101 samtools sort ${readsFile[$i]} ${readsFile[$i]}
102 fi
103 else
104 #need to copy the file.dat to tmp.bedgraph because ALFA requires the extension ".bedgraph"
105 bedgraphFile="tmpBedgraph_"$i
106 cp ${readsFile[$i]} $bedgraphFile".bedgraph"
107 readsFile[$i]=$bedgraphFile
108 fi
109 if [ "${readsLabel[$i]}" == "" ]; then
110 readsLabel[$i]="sample_""$i";
111 fi
112 readsInput=$readsInput" "${readsFile[$i]}" "${readsLabel[$i]};
113 done
114
115 #########################################################################################################
116 # DETERMINATION OF THE APPROPRIATE SCRIPTS ARGUMENTS #
117 #########################################################################################################
118 scriptPath="/export/home1/users/biocomp/chbernar/galaxy/tools/alfa/";
119 if [ "$annotationSource" == "index" ]; then
120 scriptInput="-g $index -i ""$readsInput";
121 elif [ "$annotationSource" == "built_in_index" ]; then
122 scriptInput="-g $built_in_index_prefix -i ""$readsInput";
123 else
124 scriptInput="-a $annotationFile -i ""$readsInput";
125 fi
126 if [ "$readsType" = "bedgraph" ]; then
127 scriptInput=$scriptInput" --bedgraph";
128 fi
129 scriptStrandness="-s "$strandness
130 scriptCategoriesDepth="-d "$categoriesDepth
131 if [ "$plotChoice" == "True" ]; then
132 if [ "$plotFormat" == "pdf" ]; then
133 scriptPlotOutput="--pdf plotFile.pdf";
134 else
135 scriptPlotOutput="--"$plotFormat" plotFile";
136 fi
137 if [ "$plotThresholdChoice" == "True" ]; then
138 scriptPlotOutput=$scriptPlotOutput" -t ""$yMin"" ""$yMax"
139 fi
140 else
141 scriptPlotOutput="--n";
142 fi
143
144 #########################################################################################################
145 # DISPLAY ALFA PROCESS #
146 #########################################################################################################
147 printf "__________________________________________________________________\n\n" >> $logReport
148 printf " ALFA PROCESS \n" >> $logReport
149 printf "__________________________________________________________________\n" >> $logReport
150
151 if [ "$plotChoice" == "False" ] && [ "$countFileChoice" == "False" ] && [ "$indexChoice" == "False" ]; then
152 cat <<error 1>&2
153
154 No output to return.
155 Process Aborted
156 error
157 exit 0
158 fi
159
160 printf "Command:\n" >> $logReport
161 echo "python ""$scriptPath"ALFA.py $scriptInput $scriptStrandness $scriptCategoriesDepth $scriptPlotOutput >> $logReport;
162 printf "\n******************************************************************\n" >> $logReport
163 printf "Temporary Output Directory:\n" >> $logReport
164 echo $outputDirectory >> $logReport
165 printf "\n******************************************************************\n" >> $logReport
166 printf "ALFA prompt:\n" >> $logReport
167 python "$scriptPath"ALFA.py $scriptInput $scriptStrandness $scriptCategoriesDepth $scriptPlotOutput >> $logReport 2>errorFile;
168 printf "\n******************************************************************\n" >> $logReport
169
170 #########################################################################################################
171 # REDIRECTION OF ERRORS - TMP SOURCE ALFA.PY MUST BE CORRECTED SOON #
172 #########################################################################################################
173 if [[ -s errorFile ]]; then
174 #When the option --n is enabled, alfa prints '### End of the program' in stderr even if the process worked-
175 #The following lines to avoid the tool from crashing in this case
176 endProgram=`grep -c '### End of program' errorFile`
177 if [ "$endProgram" == "0" ]; then
178 #When alfa prints '### End of program' in stdout, all the messages in stderr are considered
179 #as warnings and not as errors. True errors make the script exits with code "2"
180 endProgram=`grep -c '### End of program' $logReport`
181 if [ "$endProgram" == "0" ]; then
182 >&2 printf "The script ALFA.py encountered the following error:\n\n"
183 >&2 cat errorFile
184 printf "ALFA error:\n" >> $logReport
185 cat errorFile >> $logReport
186 printf "\n******************************************************************\n" >> $logReport
187 exit 2
188 else
189 >&2 printf "The script ALFA.py encountered the following warning:\n\n"
190 >&2 cat errorFile
191 printf "ALFA warning:\n" >> $logReport
192 cat errorFile >> $logReport
193 printf "\n******************************************************************\n" >> $logReport
194 fi
195 fi
196 fi
197
198 #########################################################################################################
199 # OUTPUT REDIRECTIONS #
200 #########################################################################################################
201 if [ "$plotChoice" == "True" ]; then
202 if [ "$plotFormat" == "pdf" ]; then
203 mv "plotFile.pdf" $outputPdf;
204 elif [ "$plotFormat" == "png" ]; then
205 mv "plotFile.categories.png" $outputCategoriesPng;
206 mv "plotFile.biotypes.png" $outputBiotypesPng;
207 else
208 mv "plotFile.categories.svg" $outputCategoriesSvg;
209 mv "plotFile.biotypes.svg" $outputBiotypesSvg;
210 fi
211 fi
212 if [ "$countFileChoice" == "True" ]; then
213 > countFile;
214 for (( i = 1; i <= readsListLen; i++ )) do
215 printf "##LABEL: "${readsLabel[$i]}"\n\n" >> countFile;
216 cat ${readsLabel[$i]}".categories_counts" >> countFile;
217 printf "__________________________________________________________________\n" >> countFile;
218 done
219 mv countFile $outputCountFile;
220 fi
221 if [ "$indexChoice" == "True" ]; then
222 if [ "$annotationSource" == "index" ]; then
223 mv $strandedIndex $outputStrandedIndex
224 mv $unstrandedIndex $outputUnstrandedIndex
225 elif [ "$annotationSource" == "built_in_index" ]; then
226 cp $built_in_index_prefix".stranded.index" $outputStrandedIndex
227 cp $built_in_index_prefix".unstranded.index" $outputUnstrandedIndex
228 else
229 annotationFileName=`grep -P -o '[^/]*\.dat$' <<< $annotationFile`
230 mv $annotationFileName".stranded.index" $outputStrandedIndex
231 mv $annotationFileName".unstranded.index" $outputUnstrandedIndex
232 fi
233 fi