view bin/batch_para_cov10p_fit.sh @ 5:2ebca9da5e42 draft default tip

planemo upload
author bioitcore
date Thu, 07 Sep 2017 17:39:24 -0400
parents adc0f7765d85
children
line wrap: on
line source

#!/bin/bash
ReadFile1Name=`basename $1`
ReadFile2Name=`basename $2`
outputname=$3
readsize=$4
DatabasePrefix=$5
Outputfolder=$6
SrcFolder=$7
noIRM=$8

cd $Outputfolder
if [ $ReadFile1Name != $ReadFile2Name ];then
	echo "ESTIMATE: Getting fragment size information from data..."
	perl $SrcFolder/get.frag.size.pl $ReadFile1Name.nomt $ReadFile2Name.nomt $readsize
	perl $SrcFolder/get.hist.pl $ReadFile1Name.nomt.fragsize -w=1 -c=1
else
	echo "ESTIMATE: Generating the other half of reads..."
	readnum=`wc -l $ReadFile1Name.nomt |cut -f1 -d" "`
	for (( i=0; i<$readnum; i++ ))
	do
		echo "NM" >>$ReadFile1Name.f.nomt
	done
	echo "#Width:1" >$ReadFile1Name.nomt.fragsize.hist
fi
echo "ESTIMATE: Creating cache folder.."
if [ $ReadFile1Name != $ReadFile2Name ];then
	ReadFile2FinalName=$ReadFile2Name.nomt
else
	ReadFile2FinalName=$ReadFile1Name.f.nomt
fi

mkdir $ReadFile1Name.result
cd $ReadFile1Name.result
ln -s ../$ReadFile1Name.nomt ./
ln -s ../$ReadFile2FinalName ./
ln -s ../$ReadFile1Name.nomt.fragsize.hist ./
echo "ESTIMATE: Split mapping results via chromosomes..."
perl $SrcFolder/scan_nomt.pl $ReadFile1Name.nomt $ReadFile2FinalName
loopi=0
echo "ESTIMATE: Generating shell scripts for Loop $loopi..."
while read chrlist
do
	chr=`echo $chrlist |tr -d "\n"`
	for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.*
	do
	base=`basename $dbfile`
	echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -s $readsize" >>r$loopi.sh
	done
done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list

echo "ESTIMATE: Submit shell scripts for Loop $loopi..."
perl $SrcFolder/batchqsub.pl r$loopi.sh
echo "ESTIMATE: Loop $loopi done..."

cat $ReadFile1Name.$loopi.*.ratio >$outputname.$loopi.ratio
cat $ReadFile1Name.$loopi.*.log >$outputname.$loopi.log
cat $ReadFile1Name.$loopi.*.nums >$outputname.$loopi.nums
rm  $ReadFile1Name.$loopi.*.ratio
rm $ReadFile1Name.$loopi.*.log
rm $ReadFile1Name.$loopi.*.nums


if [ "$noIRM" ];then
	echo "ESTIMATE: No IRM correction, skipped..."
	mv $outputname.$loopi.ratio $outputname.ratio
	mv $outputname.$loopi.log $outputname.log
	mv $outputname.$loopi.nums $outputname.nums
else

	echo "ESTIMATE: derive IRMs from data..."
	awk '{if ($15>=10) printf $0"\n"}' $outputname.$loopi.ratio >$outputname.mle
	perl $SrcFolder/get_event_dist_fit.pl $outputname.mle -c=2 -w=0.001

	loopi=1
	echo "ESTIMATE: Generating shell scripts for Loop $loopi..."
	while read chrlist
	do
		chr=`echo $chrlist |tr -d "\n"`
		for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.*
		do
		base=`basename $dbfile`
		echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -b $outputname.mle.fit.hist -s $readsize" >>r$loopi.sh
		done
	done  <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list
	echo "ESTIMATE: Submit shell scripts for Loop $loopi..."

#perl $SrcFolder/qsub/batchqsub.pl r$loopi.sh $taskname
	perl $SrcFolder/batchqsub.pl r$loopi.sh
	echo "ESTIMATE: Loop $loopi done..."
	cat $ReadFile1Name.$loopi.*.ratio >$outputname.ratio
	cat $ReadFile1Name.$loopi.*.log >$outputname.log
	cat $ReadFile1Name.$loopi.*.nums >$outputname.nums
	rm  $ReadFile1Name.$loopi.*.ratio
	rm $ReadFile1Name.$loopi.*.log
	rm $ReadFile1Name.$loopi.*.nums
fi

mv $outputname.ratio $outputname.log $outputname.nums ../
cd ../
rm $ReadFile1Name.result -rf
rm $ReadFile1Name.nomt $ReadFile2FinalName