view isoem_wrapper.sh @ 5:240c34061675 draft

Uploaded
author saharlcc
date Mon, 19 Sep 2016 22:06:23 -0400
parents 4c4d42f3e28e
children
line wrap: on
line source

#!/bin/bash


echo $@
echo pwd
pwd
isoEMDir=/home/projects/isoem2/isoem-workingversion
tmapPath=/usr/local/bin
bedtoolsPath=/usr/local/bin
hisat2Path=/usr/local/bin
tempDir=/tmp


isoem2Path=${isoEMDir}/bin

#exit;

arg=($*)
i=0
for a in ${arg[*]}
do
((i++))
	if [ "$a" == "--input1" ]; then 
		RNAseq_1=${arg[i]}
	fi

        if [ "$a" == "--input2" ]; then 
		RNAseq_2=${arg[i]}
	fi
		
	if [ "$a" == "--GTF" ]; then 
		GTF_file=${arg[i]}
	fi

        if [ "$a" == "--TMAP_INDEX" ]; then 
		TMAP_INDEX_file=${arg[i]}
	fi

        if [ "$a" == "--HISAT2_INDEX" ]; then 
		HISAT2_INDEX_file=${arg[i]}
        fi

	if [ "$a" == "--Cluster" ]; then 
		Cluster_file=${arg[i]}
	fi
	
	if [ "$a" == "-m" ]; then 
		M=${arg[i]}
	fi
	
	if [ "$a" == "-d" ]; then 
		D=${arg[i]}
	fi

	if [ "$a" == "--out_gene_fpkm" ]; then 
		out_gene_fpkm=${arg[i]}
	fi

        if [ "$a" == "--out_gene_tpm" ]; then 
		out_gene_tpm=${arg[i]}
	fi

        if [ "$a" == "--out_iso_fpkm" ]; then 
		out_iso_fpkm=${arg[i]}
 	fi

        if [ "$a" == "--out_iso_tpm" ]; then 
		out_iso_tpm=${arg[i]}
	fi

        if [ "$a" == "--out_bootstrap" ]; then 
		out_bootstrap=${arg[i]}
	fi

        if [ "$a" == "--RNA_type" ]; then 
		RNAseqType=${arg[i]}
	fi

        if [ "$a" == "--fastaFile" ]; then 
		FastaFile=${arg[i]}
	fi
done



if [ "${RNAseqType}" == "Ion-Torrent-Proton" ]
then 
        echo ${TMAP_INDEX_file}
        echo Align the RNAseq_sample fastq to transcriptome using TMAP

        f=$(basename ${RNAseq_1})
#        file_type=`echo $f | tail -c 9`

#        if [ "$file_type" == "fastq.gz" ]; then 

#            echo "Unzip fastq files"

#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
#            ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
#        fi
 
        file_type=`echo $f | tail -c 6`

        if [ "$file_type" == "fastq" ]; then

            ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
        fi

        file_type=`echo $f | tail -c 4`

        if [ "$file_type" == "bam" ]; then 

           echo "Convert BAM to fastq"

           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
           ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
        fi


elif [ "${RNAseqType}" == "Illumina-paired-end" ]
then        
        f=$(basename ${RNAseq_1})
#        file_type=`echo $f | tail -c 9`

#        if [ "$file_type" == "fastq.gz" ]; then 

#            echo "Unzip fastq files"

#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
#            gunzip -c ${RNAseq_2} > RNAseq_2.fastq
#            /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1  RNAseq_1.fastq -2  RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
#        fi

        file_type=`echo $f | tail -c 6`

        if [ "$file_type" == "fastq" ]; then

            ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  ${RNAseq_1} -2  ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
        fi

        file_type=`echo $f | tail -c 4`

        if [ "$file_type" == "bam" ]; then 

           echo "Convert BAM to fastq"

           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq
           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  RNAseq_1.fastq -2  RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
        fi

        
else  
        f=$(basename ${RNAseq_1})
#        file_type=`echo $f | tail -c 9`

#        if [ "$file_type" == "fastq.gz" ]; then 

#            echo "Unzip fastq files"

#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
#            /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U  RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
#        fi

        file_type=`echo $f | tail -c 6`

        if [ "$file_type" == "fastq" ]; then

            ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
        fi

        if [ "$file_type" == "bam" ]; then 

           echo "Convert BAM to fastq"

           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
        fi

fi


echo Sorting

LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam


if [ "${RNAseqType}" == "Illumina-paired-end" ]
then 
        echo IsoEM for RNAseq mapped to transcriptome
        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam

else
        echo IsoEM for RNAseq mapped to transcriptome
        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam 
fi

echo Join estimates files with ci files

echo ls
#ls  ./aligned_reads_sorted/ -ltr

join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 
awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm
join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm
join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm
join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm


#echo Adding output directory to bootstap archive
#
#echo ls
#ls  ./aligned_reads_sorted/ -ltr
#
#cd aligned_reads_sorted
#echo ls
#ls -ltrh
#gunzip bootstrap.tar.gz
#tar rf bootstrap.tar output
#gzip bootstrap.tar
mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap}


#echo ls after gz
#ls -ltr
#
#cd ..
#pwd


#gunzip ./aligned_reads_sorted/bootstrap.tar.gz
#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output
#gzip ./aligned_reads_sorted/bootstrap.tar 

echo ls after gz
ls -ltr

#4. Copy output files
#############################################################
mv gene_fpkm ${out_gene_fpkm}
mv gene_tpm ${out_gene_tpm} 
mv iso_fpkm ${out_iso_fpkm}
mv iso_tpm ${out_iso_tpm}

#5.Remove files
#############################################################
rm RNAseq_transcriptome.sam
rm aligned_reads_sorted.sam
rm -rf aligned_reads_sorted

echo "done"
date