1
|
1 #!/bin/bash
|
|
2 ReadFile1Name=`basename $1`
|
|
3 ReadFile2Name=`basename $2`
|
|
4 outputname=$3
|
|
5 readsize=$4
|
|
6 DatabasePrefix=$5
|
|
7 Outputfolder=$6
|
|
8 SrcFolder=$7
|
|
9 noIRM=$8
|
|
10
|
|
11 cd $Outputfolder
|
|
12 if [ $ReadFile1Name != $ReadFile2Name ];then
|
|
13 echo "ESTIMATE: Getting fragment size information from data..."
|
|
14 perl $SrcFolder/get.frag.size.pl $ReadFile1Name.nomt $ReadFile2Name.nomt $readsize
|
|
15 perl $SrcFolder/get.hist.pl $ReadFile1Name.nomt.fragsize -w=1 -c=1
|
|
16 else
|
|
17 echo "ESTIMATE: Generating the other half of reads..."
|
|
18 readnum=`wc -l $ReadFile1Name.nomt |cut -f1 -d" "`
|
|
19 for (( i=0; i<$readnum; i++ ))
|
|
20 do
|
|
21 echo "NM" >>$ReadFile1Name.f.nomt
|
|
22 done
|
|
23 echo "#Width:1" >$ReadFile1Name.nomt.fragsize.hist
|
|
24 fi
|
|
25 echo "ESTIMATE: Creating cache folder.."
|
|
26 if [ $ReadFile1Name != $ReadFile2Name ];then
|
|
27 ReadFile2FinalName=$ReadFile2Name.nomt
|
|
28 else
|
|
29 ReadFile2FinalName=$ReadFile1Name.f.nomt
|
|
30 fi
|
|
31
|
|
32 mkdir $ReadFile1Name.result
|
|
33 cd $ReadFile1Name.result
|
|
34 ln -s ../$ReadFile1Name.nomt ./
|
|
35 ln -s ../$ReadFile2FinalName ./
|
|
36 ln -s ../$ReadFile1Name.nomt.fragsize.hist ./
|
|
37 echo "ESTIMATE: Split mapping results via chromosomes..."
|
|
38 perl $SrcFolder/scan_nomt.pl $ReadFile1Name.nomt $ReadFile2FinalName
|
|
39 loopi=0
|
|
40 echo "ESTIMATE: Generating shell scripts for Loop $loopi..."
|
|
41 while read chrlist
|
|
42 do
|
|
43 chr=`echo $chrlist |tr -d "\n"`
|
|
44 for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.*
|
|
45 do
|
|
46 base=`basename $dbfile`
|
|
47 echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -s $readsize" >>r$loopi.sh
|
|
48 done
|
|
49 done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list
|
|
50
|
|
51 echo "ESTIMATE: Submit shell scripts for Loop $loopi..."
|
|
52 perl $SrcFolder/batchqsub.pl r$loopi.sh
|
|
53 echo "ESTIMATE: Loop $loopi done..."
|
|
54
|
|
55 cat $ReadFile1Name.$loopi.*.ratio >$outputname.$loopi.ratio
|
|
56 cat $ReadFile1Name.$loopi.*.log >$outputname.$loopi.log
|
|
57 cat $ReadFile1Name.$loopi.*.nums >$outputname.$loopi.nums
|
|
58 rm $ReadFile1Name.$loopi.*.ratio
|
|
59 rm $ReadFile1Name.$loopi.*.log
|
|
60 rm $ReadFile1Name.$loopi.*.nums
|
|
61
|
|
62
|
|
63 if [ "$noIRM" ];then
|
|
64 echo "ESTIMATE: No IRM correction, skipped..."
|
|
65 mv $outputname.$loopi.ratio $outputname.ratio
|
|
66 mv $outputname.$loopi.log $outputname.log
|
|
67 mv $outputname.$loopi.nums $outputname.nums
|
|
68 else
|
|
69
|
|
70 echo "ESTIMATE: derive IRMs from data..."
|
|
71 awk '{if ($15>=10) printf $0"\n"}' $outputname.$loopi.ratio >$outputname.mle
|
|
72 perl $SrcFolder/get_event_dist_fit.pl $outputname.mle -c=2 -w=0.001
|
|
73
|
|
74 loopi=1
|
|
75 echo "ESTIMATE: Generating shell scripts for Loop $loopi..."
|
|
76 while read chrlist
|
|
77 do
|
|
78 chr=`echo $chrlist |tr -d "\n"`
|
|
79 for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.*
|
|
80 do
|
|
81 base=`basename $dbfile`
|
|
82 echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -b $outputname.mle.fit.hist -s $readsize" >>r$loopi.sh
|
|
83 done
|
|
84 done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list
|
|
85 echo "ESTIMATE: Submit shell scripts for Loop $loopi..."
|
|
86
|
|
87 #perl $SrcFolder/qsub/batchqsub.pl r$loopi.sh $taskname
|
|
88 perl $SrcFolder/batchqsub.pl r$loopi.sh
|
|
89 echo "ESTIMATE: Loop $loopi done..."
|
|
90 cat $ReadFile1Name.$loopi.*.ratio >$outputname.ratio
|
|
91 cat $ReadFile1Name.$loopi.*.log >$outputname.log
|
|
92 cat $ReadFile1Name.$loopi.*.nums >$outputname.nums
|
|
93 rm $ReadFile1Name.$loopi.*.ratio
|
|
94 rm $ReadFile1Name.$loopi.*.log
|
|
95 rm $ReadFile1Name.$loopi.*.nums
|
|
96 fi
|
|
97
|
|
98 mv $outputname.ratio $outputname.log $outputname.nums ../
|
|
99 cd ../
|
|
100 rm $ReadFile1Name.result -rf
|
|
101 rm $ReadFile1Name.nomt $ReadFile2FinalName
|