diff isoem_wrapper.sh @ 3:4c4d42f3e28e draft

Uploaded
author saharlcc
date Mon, 19 Sep 2016 22:01:22 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem_wrapper.sh	Mon Sep 19 22:01:22 2016 -0400
@@ -0,0 +1,256 @@
+#!/bin/bash
+
+
+echo $@
+echo pwd
+pwd
+isoEMDir=/home/projects/isoem2/isoem-workingversion
+tmapPath=/usr/local/bin
+bedtoolsPath=/usr/local/bin
+hisat2Path=/usr/local/bin
+tempDir=/tmp
+
+
+isoem2Path=${isoEMDir}/bin
+
+#exit;
+
+arg=($*)
+i=0
+for a in ${arg[*]}
+do
+((i++))
+	if [ "$a" == "--input1" ]; then 
+		RNAseq_1=${arg[i]}
+	fi
+
+        if [ "$a" == "--input2" ]; then 
+		RNAseq_2=${arg[i]}
+	fi
+		
+	if [ "$a" == "--GTF" ]; then 
+		GTF_file=${arg[i]}
+	fi
+
+        if [ "$a" == "--TMAP_INDEX" ]; then 
+		TMAP_INDEX_file=${arg[i]}
+	fi
+
+        if [ "$a" == "--HISAT2_INDEX" ]; then 
+		HISAT2_INDEX_file=${arg[i]}
+        fi
+
+	if [ "$a" == "--Cluster" ]; then 
+		Cluster_file=${arg[i]}
+	fi
+	
+	if [ "$a" == "-m" ]; then 
+		M=${arg[i]}
+	fi
+	
+	if [ "$a" == "-d" ]; then 
+		D=${arg[i]}
+	fi
+
+	if [ "$a" == "--out_gene_fpkm" ]; then 
+		out_gene_fpkm=${arg[i]}
+	fi
+
+        if [ "$a" == "--out_gene_tpm" ]; then 
+		out_gene_tpm=${arg[i]}
+	fi
+
+        if [ "$a" == "--out_iso_fpkm" ]; then 
+		out_iso_fpkm=${arg[i]}
+ 	fi
+
+        if [ "$a" == "--out_iso_tpm" ]; then 
+		out_iso_tpm=${arg[i]}
+	fi
+
+        if [ "$a" == "--out_bootstrap" ]; then 
+		out_bootstrap=${arg[i]}
+	fi
+
+        if [ "$a" == "--RNA_type" ]; then 
+		RNAseqType=${arg[i]}
+	fi
+
+        if [ "$a" == "--fastaFile" ]; then 
+		FastaFile=${arg[i]}
+	fi
+done
+
+
+
+if [ "${RNAseqType}" == "Ion-Torrent-Proton" ]
+then 
+        echo ${TMAP_INDEX_file}
+        echo Align the RNAseq_sample fastq to transcriptome using TMAP
+
+        f=$(basename ${RNAseq_1})
+#        file_type=`echo $f | tail -c 9`
+
+#        if [ "$file_type" == "fastq.gz" ]; then 
+
+#            echo "Unzip fastq files"
+
+#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+#            ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+#        fi
+ 
+        file_type=`echo $f | tail -c 6`
+
+        if [ "$file_type" == "fastq" ]; then
+
+            ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
+        fi
+
+        file_type=`echo $f | tail -c 4`
+
+        if [ "$file_type" == "bam" ]; then 
+
+           echo "Convert BAM to fastq"
+
+           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+           ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+        fi
+
+
+elif [ "${RNAseqType}" == "Illumina-paired-end" ]
+then        
+        f=$(basename ${RNAseq_1})
+#        file_type=`echo $f | tail -c 9`
+
+#        if [ "$file_type" == "fastq.gz" ]; then 
+
+#            echo "Unzip fastq files"
+
+#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+#            gunzip -c ${RNAseq_2} > RNAseq_2.fastq
+#            /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1  RNAseq_1.fastq -2  RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+#        fi
+
+        file_type=`echo $f | tail -c 6`
+
+        if [ "$file_type" == "fastq" ]; then
+
+            ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  ${RNAseq_1} -2  ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+        fi
+
+        file_type=`echo $f | tail -c 4`
+
+        if [ "$file_type" == "bam" ]; then 
+
+           echo "Convert BAM to fastq"
+
+           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq
+           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  RNAseq_1.fastq -2  RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+        fi
+
+        
+else  
+        f=$(basename ${RNAseq_1})
+#        file_type=`echo $f | tail -c 9`
+
+#        if [ "$file_type" == "fastq.gz" ]; then 
+
+#            echo "Unzip fastq files"
+
+#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+#            /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U  RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+#        fi
+
+        file_type=`echo $f | tail -c 6`
+
+        if [ "$file_type" == "fastq" ]; then
+
+            ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+        fi
+
+        if [ "$file_type" == "bam" ]; then 
+
+           echo "Convert BAM to fastq"
+
+           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+        fi
+
+fi
+
+
+echo Sorting
+
+LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam
+
+
+if [ "${RNAseqType}" == "Illumina-paired-end" ]
+then 
+        echo IsoEM for RNAseq mapped to transcriptome
+        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam
+
+else
+        echo IsoEM for RNAseq mapped to transcriptome
+        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam 
+fi
+
+echo Join estimates files with ci files
+
+echo ls
+#ls  ./aligned_reads_sorted/ -ltr
+
+join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 
+awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm
+join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm
+join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm
+join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm
+
+
+#echo Adding output directory to bootstap archive
+#
+#echo ls
+#ls  ./aligned_reads_sorted/ -ltr
+#
+#cd aligned_reads_sorted
+#echo ls
+#ls -ltrh
+#gunzip bootstrap.tar.gz
+#tar rf bootstrap.tar output
+#gzip bootstrap.tar
+mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap}
+
+
+#echo ls after gz
+#ls -ltr
+#
+#cd ..
+#pwd
+
+
+#gunzip ./aligned_reads_sorted/bootstrap.tar.gz
+#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output
+#gzip ./aligned_reads_sorted/bootstrap.tar 
+
+echo ls after gz
+ls -ltr
+
+#4. Copy output files
+#############################################################
+mv gene_fpkm ${out_gene_fpkm}
+mv gene_tpm ${out_gene_tpm} 
+mv iso_fpkm ${out_iso_fpkm}
+mv iso_tpm ${out_iso_tpm}
+
+#5.Remove files
+#############################################################
+rm RNAseq_transcriptome.sam
+rm aligned_reads_sorted.sam
+rm -rf aligned_reads_sorted
+
+echo "done"
+date
+
+
+
+