comparison isoem_wrapper.sh @ 3:4c4d42f3e28e draft

Uploaded
author saharlcc
date Mon, 19 Sep 2016 22:01:22 -0400
parents
children
comparison
equal deleted inserted replaced
2:7044191a603b 3:4c4d42f3e28e
1 #!/bin/bash
2
3
4 echo $@
5 echo pwd
6 pwd
7 isoEMDir=/home/projects/isoem2/isoem-workingversion
8 tmapPath=/usr/local/bin
9 bedtoolsPath=/usr/local/bin
10 hisat2Path=/usr/local/bin
11 tempDir=/tmp
12
13
14 isoem2Path=${isoEMDir}/bin
15
16 #exit;
17
18 arg=($*)
19 i=0
20 for a in ${arg[*]}
21 do
22 ((i++))
23 if [ "$a" == "--input1" ]; then
24 RNAseq_1=${arg[i]}
25 fi
26
27 if [ "$a" == "--input2" ]; then
28 RNAseq_2=${arg[i]}
29 fi
30
31 if [ "$a" == "--GTF" ]; then
32 GTF_file=${arg[i]}
33 fi
34
35 if [ "$a" == "--TMAP_INDEX" ]; then
36 TMAP_INDEX_file=${arg[i]}
37 fi
38
39 if [ "$a" == "--HISAT2_INDEX" ]; then
40 HISAT2_INDEX_file=${arg[i]}
41 fi
42
43 if [ "$a" == "--Cluster" ]; then
44 Cluster_file=${arg[i]}
45 fi
46
47 if [ "$a" == "-m" ]; then
48 M=${arg[i]}
49 fi
50
51 if [ "$a" == "-d" ]; then
52 D=${arg[i]}
53 fi
54
55 if [ "$a" == "--out_gene_fpkm" ]; then
56 out_gene_fpkm=${arg[i]}
57 fi
58
59 if [ "$a" == "--out_gene_tpm" ]; then
60 out_gene_tpm=${arg[i]}
61 fi
62
63 if [ "$a" == "--out_iso_fpkm" ]; then
64 out_iso_fpkm=${arg[i]}
65 fi
66
67 if [ "$a" == "--out_iso_tpm" ]; then
68 out_iso_tpm=${arg[i]}
69 fi
70
71 if [ "$a" == "--out_bootstrap" ]; then
72 out_bootstrap=${arg[i]}
73 fi
74
75 if [ "$a" == "--RNA_type" ]; then
76 RNAseqType=${arg[i]}
77 fi
78
79 if [ "$a" == "--fastaFile" ]; then
80 FastaFile=${arg[i]}
81 fi
82 done
83
84
85
86 if [ "${RNAseqType}" == "Ion-Torrent-Proton" ]
87 then
88 echo ${TMAP_INDEX_file}
89 echo Align the RNAseq_sample fastq to transcriptome using TMAP
90
91 f=$(basename ${RNAseq_1})
92 # file_type=`echo $f | tail -c 9`
93
94 # if [ "$file_type" == "fastq.gz" ]; then
95
96 # echo "Unzip fastq files"
97
98 # gunzip -c ${RNAseq_1} > RNAseq_1.fastq
99 # ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
100 # fi
101
102 file_type=`echo $f | tail -c 6`
103
104 if [ "$file_type" == "fastq" ]; then
105
106 ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
107 fi
108
109 file_type=`echo $f | tail -c 4`
110
111 if [ "$file_type" == "bam" ]; then
112
113 echo "Convert BAM to fastq"
114
115 ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
116 ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
117 fi
118
119
120 elif [ "${RNAseqType}" == "Illumina-paired-end" ]
121 then
122 f=$(basename ${RNAseq_1})
123 # file_type=`echo $f | tail -c 9`
124
125 # if [ "$file_type" == "fastq.gz" ]; then
126
127 # echo "Unzip fastq files"
128
129 # gunzip -c ${RNAseq_1} > RNAseq_1.fastq
130 # gunzip -c ${RNAseq_2} > RNAseq_2.fastq
131 # /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
132 # fi
133
134 file_type=`echo $f | tail -c 6`
135
136 if [ "$file_type" == "fastq" ]; then
137
138 ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 ${RNAseq_1} -2 ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
139 fi
140
141 file_type=`echo $f | tail -c 4`
142
143 if [ "$file_type" == "bam" ]; then
144
145 echo "Convert BAM to fastq"
146
147 ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
148 ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq
149 ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
150 fi
151
152
153 else
154 f=$(basename ${RNAseq_1})
155 # file_type=`echo $f | tail -c 9`
156
157 # if [ "$file_type" == "fastq.gz" ]; then
158
159 # echo "Unzip fastq files"
160
161 # gunzip -c ${RNAseq_1} > RNAseq_1.fastq
162 # /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
163 # fi
164
165 file_type=`echo $f | tail -c 6`
166
167 if [ "$file_type" == "fastq" ]; then
168
169 ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
170 fi
171
172 if [ "$file_type" == "bam" ]; then
173
174 echo "Convert BAM to fastq"
175
176 ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
177 ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
178 fi
179
180 fi
181
182
183 echo Sorting
184
185 LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam
186
187
188 if [ "${RNAseqType}" == "Illumina-paired-end" ]
189 then
190 echo IsoEM for RNAseq mapped to transcriptome
191 ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam
192
193 else
194 echo IsoEM for RNAseq mapped to transcriptome
195 ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam
196 fi
197
198 echo Join estimates files with ci files
199
200 echo ls
201 #ls ./aligned_reads_sorted/ -ltr
202
203 join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333
204 awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm
205 join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm
206 join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm
207 join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm
208
209
210 #echo Adding output directory to bootstap archive
211 #
212 #echo ls
213 #ls ./aligned_reads_sorted/ -ltr
214 #
215 #cd aligned_reads_sorted
216 #echo ls
217 #ls -ltrh
218 #gunzip bootstrap.tar.gz
219 #tar rf bootstrap.tar output
220 #gzip bootstrap.tar
221 mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap}
222
223
224 #echo ls after gz
225 #ls -ltr
226 #
227 #cd ..
228 #pwd
229
230
231 #gunzip ./aligned_reads_sorted/bootstrap.tar.gz
232 #tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output
233 #gzip ./aligned_reads_sorted/bootstrap.tar
234
235 echo ls after gz
236 ls -ltr
237
238 #4. Copy output files
239 #############################################################
240 mv gene_fpkm ${out_gene_fpkm}
241 mv gene_tpm ${out_gene_tpm}
242 mv iso_fpkm ${out_iso_fpkm}
243 mv iso_tpm ${out_iso_tpm}
244
245 #5.Remove files
246 #############################################################
247 rm RNAseq_transcriptome.sam
248 rm aligned_reads_sorted.sam
249 rm -rf aligned_reads_sorted
250
251 echo "done"
252 date
253
254
255
256