3
|
1 #!/bin/bash
|
|
2
|
|
3
|
|
4 echo $@
|
|
5 echo pwd
|
|
6 pwd
|
|
7 isoEMDir=/home/projects/isoem2/isoem-workingversion
|
|
8 tmapPath=/usr/local/bin
|
|
9 bedtoolsPath=/usr/local/bin
|
|
10 hisat2Path=/usr/local/bin
|
|
11 tempDir=/tmp
|
|
12
|
|
13
|
|
14 isoem2Path=${isoEMDir}/bin
|
|
15
|
|
16 #exit;
|
|
17
|
|
18 arg=($*)
|
|
19 i=0
|
|
20 for a in ${arg[*]}
|
|
21 do
|
|
22 ((i++))
|
|
23 if [ "$a" == "--input1" ]; then
|
|
24 RNAseq_1=${arg[i]}
|
|
25 fi
|
|
26
|
|
27 if [ "$a" == "--input2" ]; then
|
|
28 RNAseq_2=${arg[i]}
|
|
29 fi
|
|
30
|
|
31 if [ "$a" == "--GTF" ]; then
|
|
32 GTF_file=${arg[i]}
|
|
33 fi
|
|
34
|
|
35 if [ "$a" == "--TMAP_INDEX" ]; then
|
|
36 TMAP_INDEX_file=${arg[i]}
|
|
37 fi
|
|
38
|
|
39 if [ "$a" == "--HISAT2_INDEX" ]; then
|
|
40 HISAT2_INDEX_file=${arg[i]}
|
|
41 fi
|
|
42
|
|
43 if [ "$a" == "--Cluster" ]; then
|
|
44 Cluster_file=${arg[i]}
|
|
45 fi
|
|
46
|
|
47 if [ "$a" == "-m" ]; then
|
|
48 M=${arg[i]}
|
|
49 fi
|
|
50
|
|
51 if [ "$a" == "-d" ]; then
|
|
52 D=${arg[i]}
|
|
53 fi
|
|
54
|
|
55 if [ "$a" == "--out_gene_fpkm" ]; then
|
|
56 out_gene_fpkm=${arg[i]}
|
|
57 fi
|
|
58
|
|
59 if [ "$a" == "--out_gene_tpm" ]; then
|
|
60 out_gene_tpm=${arg[i]}
|
|
61 fi
|
|
62
|
|
63 if [ "$a" == "--out_iso_fpkm" ]; then
|
|
64 out_iso_fpkm=${arg[i]}
|
|
65 fi
|
|
66
|
|
67 if [ "$a" == "--out_iso_tpm" ]; then
|
|
68 out_iso_tpm=${arg[i]}
|
|
69 fi
|
|
70
|
|
71 if [ "$a" == "--out_bootstrap" ]; then
|
|
72 out_bootstrap=${arg[i]}
|
|
73 fi
|
|
74
|
|
75 if [ "$a" == "--RNA_type" ]; then
|
|
76 RNAseqType=${arg[i]}
|
|
77 fi
|
|
78
|
|
79 if [ "$a" == "--fastaFile" ]; then
|
|
80 FastaFile=${arg[i]}
|
|
81 fi
|
|
82 done
|
|
83
|
|
84
|
|
85
|
|
86 if [ "${RNAseqType}" == "Ion-Torrent-Proton" ]
|
|
87 then
|
|
88 echo ${TMAP_INDEX_file}
|
|
89 echo Align the RNAseq_sample fastq to transcriptome using TMAP
|
|
90
|
|
91 f=$(basename ${RNAseq_1})
|
|
92 # file_type=`echo $f | tail -c 9`
|
|
93
|
|
94 # if [ "$file_type" == "fastq.gz" ]; then
|
|
95
|
|
96 # echo "Unzip fastq files"
|
|
97
|
|
98 # gunzip -c ${RNAseq_1} > RNAseq_1.fastq
|
|
99 # ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
|
|
100 # fi
|
|
101
|
|
102 file_type=`echo $f | tail -c 6`
|
|
103
|
|
104 if [ "$file_type" == "fastq" ]; then
|
|
105
|
|
106 ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
|
|
107 fi
|
|
108
|
|
109 file_type=`echo $f | tail -c 4`
|
|
110
|
|
111 if [ "$file_type" == "bam" ]; then
|
|
112
|
|
113 echo "Convert BAM to fastq"
|
|
114
|
|
115 ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
|
|
116 ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
|
|
117 fi
|
|
118
|
|
119
|
|
120 elif [ "${RNAseqType}" == "Illumina-paired-end" ]
|
|
121 then
|
|
122 f=$(basename ${RNAseq_1})
|
|
123 # file_type=`echo $f | tail -c 9`
|
|
124
|
|
125 # if [ "$file_type" == "fastq.gz" ]; then
|
|
126
|
|
127 # echo "Unzip fastq files"
|
|
128
|
|
129 # gunzip -c ${RNAseq_1} > RNAseq_1.fastq
|
|
130 # gunzip -c ${RNAseq_2} > RNAseq_2.fastq
|
|
131 # /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
|
|
132 # fi
|
|
133
|
|
134 file_type=`echo $f | tail -c 6`
|
|
135
|
|
136 if [ "$file_type" == "fastq" ]; then
|
|
137
|
|
138 ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 ${RNAseq_1} -2 ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
|
|
139 fi
|
|
140
|
|
141 file_type=`echo $f | tail -c 4`
|
|
142
|
|
143 if [ "$file_type" == "bam" ]; then
|
|
144
|
|
145 echo "Convert BAM to fastq"
|
|
146
|
|
147 ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
|
|
148 ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq
|
|
149 ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
|
|
150 fi
|
|
151
|
|
152
|
|
153 else
|
|
154 f=$(basename ${RNAseq_1})
|
|
155 # file_type=`echo $f | tail -c 9`
|
|
156
|
|
157 # if [ "$file_type" == "fastq.gz" ]; then
|
|
158
|
|
159 # echo "Unzip fastq files"
|
|
160
|
|
161 # gunzip -c ${RNAseq_1} > RNAseq_1.fastq
|
|
162 # /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
|
|
163 # fi
|
|
164
|
|
165 file_type=`echo $f | tail -c 6`
|
|
166
|
|
167 if [ "$file_type" == "fastq" ]; then
|
|
168
|
|
169 ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
|
|
170 fi
|
|
171
|
|
172 if [ "$file_type" == "bam" ]; then
|
|
173
|
|
174 echo "Convert BAM to fastq"
|
|
175
|
|
176 ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
|
|
177 ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
|
|
178 fi
|
|
179
|
|
180 fi
|
|
181
|
|
182
|
|
183 echo Sorting
|
|
184
|
|
185 LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam
|
|
186
|
|
187
|
|
188 if [ "${RNAseqType}" == "Illumina-paired-end" ]
|
|
189 then
|
|
190 echo IsoEM for RNAseq mapped to transcriptome
|
|
191 ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam
|
|
192
|
|
193 else
|
|
194 echo IsoEM for RNAseq mapped to transcriptome
|
|
195 ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam
|
|
196 fi
|
|
197
|
|
198 echo Join estimates files with ci files
|
|
199
|
|
200 echo ls
|
|
201 #ls ./aligned_reads_sorted/ -ltr
|
|
202
|
|
203 join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333
|
|
204 awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm
|
|
205 join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm
|
|
206 join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm
|
|
207 join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm
|
|
208
|
|
209
|
|
210 #echo Adding output directory to bootstap archive
|
|
211 #
|
|
212 #echo ls
|
|
213 #ls ./aligned_reads_sorted/ -ltr
|
|
214 #
|
|
215 #cd aligned_reads_sorted
|
|
216 #echo ls
|
|
217 #ls -ltrh
|
|
218 #gunzip bootstrap.tar.gz
|
|
219 #tar rf bootstrap.tar output
|
|
220 #gzip bootstrap.tar
|
|
221 mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap}
|
|
222
|
|
223
|
|
224 #echo ls after gz
|
|
225 #ls -ltr
|
|
226 #
|
|
227 #cd ..
|
|
228 #pwd
|
|
229
|
|
230
|
|
231 #gunzip ./aligned_reads_sorted/bootstrap.tar.gz
|
|
232 #tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output
|
|
233 #gzip ./aligned_reads_sorted/bootstrap.tar
|
|
234
|
|
235 echo ls after gz
|
|
236 ls -ltr
|
|
237
|
|
238 #4. Copy output files
|
|
239 #############################################################
|
|
240 mv gene_fpkm ${out_gene_fpkm}
|
|
241 mv gene_tpm ${out_gene_tpm}
|
|
242 mv iso_fpkm ${out_iso_fpkm}
|
|
243 mv iso_tpm ${out_iso_tpm}
|
|
244
|
|
245 #5.Remove files
|
|
246 #############################################################
|
|
247 rm RNAseq_transcriptome.sam
|
|
248 rm aligned_reads_sorted.sam
|
|
249 rm -rf aligned_reads_sorted
|
|
250
|
|
251 echo "done"
|
|
252 date
|
|
253
|
|
254
|
|
255
|
|
256
|