diff bin/batch_para_cov10p_fit.sh @ 1:adc0f7765d85 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 15:06:58 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/batch_para_cov10p_fit.sh	Thu Sep 07 15:06:58 2017 -0400
@@ -0,0 +1,101 @@
+#!/bin/bash
+ReadFile1Name=`basename $1`
+ReadFile2Name=`basename $2`
+outputname=$3
+readsize=$4
+DatabasePrefix=$5
+Outputfolder=$6
+SrcFolder=$7
+noIRM=$8
+
+cd $Outputfolder
+if [ $ReadFile1Name != $ReadFile2Name ];then
+	echo "ESTIMATE: Getting fragment size information from data..."
+	perl $SrcFolder/get.frag.size.pl $ReadFile1Name.nomt $ReadFile2Name.nomt $readsize
+	perl $SrcFolder/get.hist.pl $ReadFile1Name.nomt.fragsize -w=1 -c=1
+else
+	echo "ESTIMATE: Generating the other half of reads..."
+	readnum=`wc -l $ReadFile1Name.nomt |cut -f1 -d" "`
+	for (( i=0; i<$readnum; i++ ))
+	do
+		echo "NM" >>$ReadFile1Name.f.nomt
+	done
+	echo "#Width:1" >$ReadFile1Name.nomt.fragsize.hist
+fi
+echo "ESTIMATE: Creating cache folder.."
+if [ $ReadFile1Name != $ReadFile2Name ];then
+	ReadFile2FinalName=$ReadFile2Name.nomt
+else
+	ReadFile2FinalName=$ReadFile1Name.f.nomt
+fi
+
+mkdir $ReadFile1Name.result
+cd $ReadFile1Name.result
+ln -s ../$ReadFile1Name.nomt ./
+ln -s ../$ReadFile2FinalName ./
+ln -s ../$ReadFile1Name.nomt.fragsize.hist ./
+echo "ESTIMATE: Split mapping results via chromosomes..."
+perl $SrcFolder/scan_nomt.pl $ReadFile1Name.nomt $ReadFile2FinalName
+loopi=0
+echo "ESTIMATE: Generating shell scripts for Loop $loopi..."
+while read chrlist
+do
+	chr=`echo $chrlist |tr -d "\n"`
+	for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.*
+	do
+	base=`basename $dbfile`
+	echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -s $readsize" >>r$loopi.sh
+	done
+done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list
+
+echo "ESTIMATE: Submit shell scripts for Loop $loopi..."
+perl $SrcFolder/batchqsub.pl r$loopi.sh
+echo "ESTIMATE: Loop $loopi done..."
+
+cat $ReadFile1Name.$loopi.*.ratio >$outputname.$loopi.ratio
+cat $ReadFile1Name.$loopi.*.log >$outputname.$loopi.log
+cat $ReadFile1Name.$loopi.*.nums >$outputname.$loopi.nums
+rm  $ReadFile1Name.$loopi.*.ratio
+rm $ReadFile1Name.$loopi.*.log
+rm $ReadFile1Name.$loopi.*.nums
+
+
+if [ "$noIRM" ];then
+	echo "ESTIMATE: No IRM correction, skipped..."
+	mv $outputname.$loopi.ratio $outputname.ratio
+	mv $outputname.$loopi.log $outputname.log
+	mv $outputname.$loopi.nums $outputname.nums
+else
+
+	echo "ESTIMATE: derive IRMs from data..."
+	awk '{if ($15>=10) printf $0"\n"}' $outputname.$loopi.ratio >$outputname.mle
+	perl $SrcFolder/get_event_dist_fit.pl $outputname.mle -c=2 -w=0.001
+
+	loopi=1
+	echo "ESTIMATE: Generating shell scripts for Loop $loopi..."
+	while read chrlist
+	do
+		chr=`echo $chrlist |tr -d "\n"`
+		for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.*
+		do
+		base=`basename $dbfile`
+		echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -b $outputname.mle.fit.hist -s $readsize" >>r$loopi.sh
+		done
+	done  <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list
+	echo "ESTIMATE: Submit shell scripts for Loop $loopi..."
+
+#perl $SrcFolder/qsub/batchqsub.pl r$loopi.sh $taskname
+	perl $SrcFolder/batchqsub.pl r$loopi.sh
+	echo "ESTIMATE: Loop $loopi done..."
+	cat $ReadFile1Name.$loopi.*.ratio >$outputname.ratio
+	cat $ReadFile1Name.$loopi.*.log >$outputname.log
+	cat $ReadFile1Name.$loopi.*.nums >$outputname.nums
+	rm  $ReadFile1Name.$loopi.*.ratio
+	rm $ReadFile1Name.$loopi.*.log
+	rm $ReadFile1Name.$loopi.*.nums
+fi
+
+mv $outputname.ratio $outputname.log $outputname.nums ../
+cd ../
+rm $ReadFile1Name.result -rf
+rm $ReadFile1Name.nomt $ReadFile2FinalName