diff ALFA/alfa_wrapper.sh @ 18:a1e2ab10b317 draft

Uploaded
author charles-bernard
date Tue, 11 Oct 2016 09:18:48 -0400
parents
children 1239f24962d8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ALFA/alfa_wrapper.sh	Tue Oct 11 09:18:48 2016 -0400
@@ -0,0 +1,233 @@
+#!usr/bin/bash 
+
+#########################################################################################################
+# ARGUMENTS FROM alfa_wrapper.xml                                                                       #
+#########################################################################################################
+configFile=$1;
+logReport=$2;
+sed -i -e '/^$/d; s/\t//g;' $configFile;
+printf "__________________________________________________________________\n\n" > $logReport
+printf "                          ALFA CONFIG                             \n" >> $logReport
+printf "__________________________________________________________________\n" >> $logReport
+cat $configFile >> $logReport
+
+#########################################################################################################
+# INITIALIZATION OF THE VARIABLES from $configFile                                                      #
+#########################################################################################################
+#_INPUT1
+annotationSource=`grep -P '^annotationSource ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+if [ "$annotationSource" == "personal_gtf" ]; then
+	annotationFile=`grep -P '^annotationFile ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+elif [ "$annotationSource" == "built_in_index" ]; then
+	built_in_index_prefix=`grep -P '^built_in_index_prefix ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+else
+	strandedIndex=`grep -P '^strandedIndex ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+	unstrandedIndex=`grep -P '^unstrandedIndex ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+fi
+
+#_INPUT2
+readsType=`grep -P '^readsType ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+readsFileList=`grep -P '^readsFile\[[0-9]+\] ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+readsLabelList=`grep -P '^readsLabel\[[0-9]+\] ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+
+#_OUTPUT CHOICES
+plotChoice=`grep -P '^plotChoice ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+countFileChoice=`grep -P '^countFileChoice ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+indexChoice=`grep -P '^indexChoice ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+
+#_OUTPUT OPTIONS
+strandness=`grep -P '^strandness ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+categoriesDepth=`grep -P '^categoriesDepth ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+plotFormat=`grep -P '^plotFormat ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+plotThresholdChoice=`grep -P '^plotThresholdChoice ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+if [ "$plotThresholdChoice" == "True" ]; then
+	yMin=`grep -P '^yMin ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+ 	yMax=`grep -P '^yMax ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+fi
+
+#_OUTPUT FILES
+if [ "$plotChoice" == "True" ]; then 
+	if [ "$plotFormat" == "pdf" ]; then
+		outputPdf=`grep -P '^outputPdf ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+	elif [ "$plotFormat" == "svg" ]; then
+		outputCategoriesSvg=`grep -P '^outputCategoriesSvg ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+		outputBiotypesSvg=`grep -P '^outputBiotypesSvg ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+	else
+		outputCategoriesPng=`grep -P '^outputCategoriesPng ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+		outputBiotypesPng=`grep -P '^outputBiotypesPng ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+	fi
+fi
+if [ "$countFileChoice" == "True" ]; then 
+	outputCountFile=`grep -P '^outputCountFile ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+fi
+if [ "$indexChoice" == "True" ]; then 
+	outputStrandedIndex=`grep -P '^outputStrandedIndex ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+	outputUnstrandedIndex=`grep -P '^outputUnstrandedIndex ?=' $configFile | awk 'BEGIN{FS="= ?"} {print $2}'`;
+fi
+
+#########################################################################################################
+# CREATION OF A TMP DIRECTORY FOR THE OUTPUT FILES OF ALFA AND cd                                       #
+#########################################################################################################
+outputDirectory=`mktemp -d /export/home1/users/biocomp/chbernar/galaxy/database/tmp/tmpXXXXXX`;
+if [ -d $outputDirectory ]; then
+	chmod -R ugo+wrx $outputDirectory;
+	rm -R $outputDirectory;
+fi
+mkdir $outputDirectory;
+chmod -R ugo+wrx $outputDirectory;
+cd $outputDirectory;
+
+#########################################################################################################
+# TEST OF INPUT1                                                                                        #
+#########################################################################################################
+if [ "$annotationSource" == "index" ]; then
+	#need to copy the files.dat to .*index because ALFA requires the extension ".(un)stranded.index"
+	index="index"
+	cp $strandedIndex $index".stranded.index"
+	cp $unstrandedIndex $index".unstranded.index"
+fi
+
+#########################################################################################################
+# TEST OF INPUT2 AND DETERMINATION OF PYTHON READS INPUT ARGUMENT                                       #
+#########################################################################################################
+readsListLen=`echo "$readsFileList" | wc -l`;
+readsInput="";
+for (( i = 1; i <= readsListLen; i++ )) do
+	readsFile[$i]=`echo "$readsFileList" | awk -v i=$i 'NR==i'`;
+	readsLabel[$i]=`echo "$readsLabelList" | awk -v i=$i 'NR==i' | sed -e 's/ /_/g'`;
+	if [ "$readsType" == "bam" ]; then
+		bamSorted=`samtools view -H "${readsFile[$i]}" | grep -c 'SO:unsorted'`
+		if [ "$bamSorted" != "0" ] ; then
+			samtools sort ${readsFile[$i]} ${readsFile[$i]}
+		fi
+	else
+		#need to copy the file.dat to tmp.bedgraph because ALFA requires the extension ".bedgraph"
+		bedgraphFile="tmpBedgraph_"$i
+		cp ${readsFile[$i]} $bedgraphFile".bedgraph"
+		readsFile[$i]=$bedgraphFile
+	fi
+	if [ "${readsLabel[$i]}" == "" ]; then
+		readsLabel[$i]="sample_""$i";
+	fi
+	readsInput=$readsInput" "${readsFile[$i]}" "${readsLabel[$i]};
+done
+
+#########################################################################################################
+# DETERMINATION OF THE APPROPRIATE SCRIPTS ARGUMENTS                                                    #
+#########################################################################################################
+scriptPath="/export/home1/users/biocomp/chbernar/galaxy/tools/alfa/";
+if [ "$annotationSource" == "index" ]; then
+	scriptInput="-g $index -i ""$readsInput";
+elif [ "$annotationSource" == "built_in_index" ]; then
+	scriptInput="-g $built_in_index_prefix -i ""$readsInput";
+else
+	scriptInput="-a $annotationFile -i ""$readsInput";
+fi
+if [ "$readsType" = "bedgraph" ]; then
+	scriptInput=$scriptInput" --bedgraph";
+fi
+scriptStrandness="-s "$strandness
+scriptCategoriesDepth="-d "$categoriesDepth
+if [ "$plotChoice" == "True" ]; then
+	if [ "$plotFormat" == "pdf" ]; then
+		scriptPlotOutput="--pdf plotFile.pdf";
+	else
+		scriptPlotOutput="--"$plotFormat" plotFile";
+	fi
+	if [ "$plotThresholdChoice" == "True" ]; then
+		scriptPlotOutput=$scriptPlotOutput" -t ""$yMin"" ""$yMax"
+	fi
+else
+	scriptPlotOutput="--n";
+fi
+
+#########################################################################################################
+# DISPLAY ALFA PROCESS                                                                                  #
+#########################################################################################################
+printf "__________________________________________________________________\n\n" >> $logReport
+printf "                          ALFA PROCESS                            \n" >> $logReport
+printf "__________________________________________________________________\n" >> $logReport
+
+if [ "$plotChoice" == "False" ] && [ "$countFileChoice" == "False" ] && [ "$indexChoice" == "False" ]; then
+cat <<error 1>&2
+
+No output to return. 
+Process Aborted
+error
+exit 0
+fi
+
+printf "Command:\n" >> $logReport
+echo "python ""$scriptPath"ALFA.py $scriptInput $scriptStrandness $scriptCategoriesDepth $scriptPlotOutput >> $logReport;
+printf "\n******************************************************************\n" >> $logReport
+printf "Temporary Output Directory:\n" >> $logReport
+echo $outputDirectory >> $logReport
+printf "\n******************************************************************\n" >> $logReport
+printf "ALFA prompt:\n" >> $logReport
+python "$scriptPath"ALFA.py $scriptInput $scriptStrandness $scriptCategoriesDepth $scriptPlotOutput >> $logReport 2>errorFile;
+printf "\n******************************************************************\n" >> $logReport
+
+#########################################################################################################
+# REDIRECTION OF ERRORS - TMP SOURCE ALFA.PY MUST BE CORRECTED SOON                                     #
+#########################################################################################################
+if [[ -s errorFile ]]; then
+	#When the option --n is enabled, alfa prints '### End of the program' in stderr even if the process worked-
+	#The following lines to avoid the tool from crashing in this case
+	endProgram=`grep -c '### End of program' errorFile`
+	if [ "$endProgram" == "0" ]; then
+		#When alfa prints '### End of program' in stdout, all the messages in stderr are considered
+		#as warnings and not as errors. True errors make the script exits with code "2"
+		endProgram=`grep -c '### End of program' $logReport`
+		if [ "$endProgram" == "0" ]; then
+ 			>&2 printf "The script ALFA.py encountered the following error:\n\n"
+			>&2 cat errorFile
+			printf "ALFA error:\n" >> $logReport
+			cat errorFile >> $logReport
+			printf "\n******************************************************************\n" >> $logReport
+ 			exit 2
+ 		else
+ 			>&2 printf "The script ALFA.py encountered the following warning:\n\n"
+ 			>&2 cat errorFile 
+ 			printf "ALFA warning:\n" >> $logReport
+ 			cat errorFile >> $logReport
+			printf "\n******************************************************************\n" >> $logReport
+ 		fi
+ 	fi
+fi
+
+#########################################################################################################
+# OUTPUT REDIRECTIONS                                                                                   #
+#########################################################################################################
+if [ "$plotChoice" == "True" ]; then
+	if [ "$plotFormat" == "pdf" ]; then
+		mv "plotFile.pdf" $outputPdf;
+	elif [ "$plotFormat" == "png" ]; then
+		mv "plotFile.categories.png" $outputCategoriesPng;
+		mv "plotFile.biotypes.png" $outputBiotypesPng;
+	else 
+		mv "plotFile.categories.svg" $outputCategoriesSvg;
+		mv "plotFile.biotypes.svg" $outputBiotypesSvg;
+	fi
+fi
+if [ "$countFileChoice" == "True" ]; then
+	> countFile;
+	for (( i = 1; i <= readsListLen; i++ )) do
+		printf "##LABEL: "${readsLabel[$i]}"\n\n" >> countFile;
+		cat ${readsLabel[$i]}".categories_counts" >> countFile;
+		printf "__________________________________________________________________\n" >> countFile;
+	done
+	mv countFile $outputCountFile;
+fi
+if [ "$indexChoice" == "True" ]; then
+	if [ "$annotationSource" == "index" ]; then
+		mv $strandedIndex $outputStrandedIndex
+		mv $unstrandedIndex $outputUnstrandedIndex
+	elif [ "$annotationSource" == "built_in_index" ]; then
+		cp $built_in_index_prefix".stranded.index" $outputStrandedIndex
+		cp $built_in_index_prefix".unstranded.index" $outputUnstrandedIndex
+	else
+		annotationFileName=`grep -P -o '[^/]*\.dat$' <<< $annotationFile`
+		mv $annotationFileName".stranded.index" $outputStrandedIndex
+		mv $annotationFileName".unstranded.index" $outputUnstrandedIndex
+	fi
+fi
\ No newline at end of file