annotate bin/mapping_bowtie.sh @ 1:adc0f7765d85 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 15:06:58 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
1 #!/bin/bash
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
2 #SrcFolder='/data/zhang/wuj/scripts/SpliceTrap.0.8'
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
3 InputFileName=$1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
4 faorfq=$2
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
5 DatabasePrefix=$3
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
6 Outputfolder=$4
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
7 SrcFolder=$5
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
8 Threads=$6
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
9 DatabaseFolder=$SrcFolder'/../db/'$DatabasePrefix'/btw/TXdb'
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
10 TmpFolderName=`basename $1`
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
11 #fasta or fastq
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
12
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
13 cd $Outputfolder;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
14 #prepare the folder
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
15 if [ -d $TmpFolderName.result ];then
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
16 echo "MAPPING: !!!Error, there is already a folder named "$TmpFolderName".result !"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
17 echo "MAPPING: !!!change the name of that folder first in case I erase them..."
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
18 exit
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
19 fi
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
20 echo "MAPPING: Start mapping $InputFileName...Creating cache folder $TmpFolderName.result"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
21 mkdir $TmpFolderName".result"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
22 mkdir $TmpFolderName".result"/cache
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
23
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
24 cd $TmpFolderName".result"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
25 cd cache
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
26 echo "MAPPING: Split to pieces ..."
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
27 split -l 1000000 $InputFileName
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
28 for name in x*
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
29 do
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
30
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
31 if [ $faorfq == "fasta" ];then
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
32 add="-f"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
33 fi
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
34 # if [ $name != $InputFileName ];then
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
35 echo "bowtie -p $Threads -a -v 2 $DatabaseFolder $name $add >$name.btw; perl $SrcFolder/bowtie2eland.pl $name.btw $name $name.eland;rm $name.btw ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>map.sh
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
36 echo $name >>checklist
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
37 # fi
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
38 done
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
39
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
40 echo "MAPPING: submit scripts..."
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
41 perl $SrcFolder/batchqsub.pl map.sh
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
42
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
43 tasknum=`wc -l map.sh |tr -d "\n"`
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
44 #checking..
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
45
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
46 echo "MAPPING: mapping $InputFileName to TXdb done...start to check.."
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
47 while [ 1 ]
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
48 do
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
49 if [ -f mapcheck.sh ];then
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
50 rm mapcheck.sh
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
51 fi
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
52 while read checklist
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
53 do
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
54
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
55 name=`echo $checklist |tr -d "\n"`
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
56 echo "MAPPING: checking $name...."
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
57 readnum=`wc -l $name | cut -f1 -d" "`
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
58 if [ $faorfq == "fasta" ];then
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
59 readnum=`echo "$readnum/2"|bc`
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
60 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
61 readnum=`echo "$readnum/4"|bc`
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
62 fi
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
63 if [ -f $name.nomt ];then
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
64 bowtienum=`wc -l $name.nomt | cut -f1 -d" "`
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
65 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
66 bowtienum=0
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
67 fi
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
68 if [ $bowtienum != $readnum ];then
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
69 echo "bowtie -p $Threads -a -v 2 $DatabaseFolder $name $add >$name.btw; perl $SrcFolder/bowtie2eland.pl $name.btw $name $name.eland;rm $name.btw ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>mapcheck.sh
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
70
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
71 fi
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
72 done <checklist
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
73 if [ -f mapcheck.sh ];then
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
74 checktasknum=`wc -l mapcheck.sh |tr -d "\n"`
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
75 if [ $checktasknum == $tasknum ];then
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
76 echo "MAPPING: warning! none of the mapping tasks properly finished!"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
77 fi
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
78 echo "MAPPING: resubmit TASKS...."
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
79 perl $SrcFolder/batchqsub.pl mapcheck.sh
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
80 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
81 break
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
82 fi
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
83 done
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
84 echo "MAPPING: Done.....merging files..."
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
85 cat *.nomt >$Outputfolder/$TmpFolderName.nomt
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
86 cd ../../
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
87 rm $TmpFolderName.result -rf
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
88 #/data/zhang/wuj/tools/bowtie-0.12.3/bowtie -a $DatabaseFolderTXdb -f
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
89