Mercurial > repos > saharlcc > isoem2_isode2
changeset 3:4c4d42f3e28e draft
Uploaded
author | saharlcc |
---|---|
date | Mon, 19 Sep 2016 22:01:22 -0400 (2016-09-20) |
parents | 7044191a603b |
children | 2e3d3451541b |
files | isoem_wrapper.sh |
diffstat | 1 files changed, 256 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem_wrapper.sh Mon Sep 19 22:01:22 2016 -0400 @@ -0,0 +1,256 @@ +#!/bin/bash + + +echo $@ +echo pwd +pwd +isoEMDir=/home/projects/isoem2/isoem-workingversion +tmapPath=/usr/local/bin +bedtoolsPath=/usr/local/bin +hisat2Path=/usr/local/bin +tempDir=/tmp + + +isoem2Path=${isoEMDir}/bin + +#exit; + +arg=($*) +i=0 +for a in ${arg[*]} +do +((i++)) + if [ "$a" == "--input1" ]; then + RNAseq_1=${arg[i]} + fi + + if [ "$a" == "--input2" ]; then + RNAseq_2=${arg[i]} + fi + + if [ "$a" == "--GTF" ]; then + GTF_file=${arg[i]} + fi + + if [ "$a" == "--TMAP_INDEX" ]; then + TMAP_INDEX_file=${arg[i]} + fi + + if [ "$a" == "--HISAT2_INDEX" ]; then + HISAT2_INDEX_file=${arg[i]} + fi + + if [ "$a" == "--Cluster" ]; then + Cluster_file=${arg[i]} + fi + + if [ "$a" == "-m" ]; then + M=${arg[i]} + fi + + if [ "$a" == "-d" ]; then + D=${arg[i]} + fi + + if [ "$a" == "--out_gene_fpkm" ]; then + out_gene_fpkm=${arg[i]} + fi + + if [ "$a" == "--out_gene_tpm" ]; then + out_gene_tpm=${arg[i]} + fi + + if [ "$a" == "--out_iso_fpkm" ]; then + out_iso_fpkm=${arg[i]} + fi + + if [ "$a" == "--out_iso_tpm" ]; then + out_iso_tpm=${arg[i]} + fi + + if [ "$a" == "--out_bootstrap" ]; then + out_bootstrap=${arg[i]} + fi + + if [ "$a" == "--RNA_type" ]; then + RNAseqType=${arg[i]} + fi + + if [ "$a" == "--fastaFile" ]; then + FastaFile=${arg[i]} + fi +done + + + +if [ "${RNAseqType}" == "Ion-Torrent-Proton" ] +then + echo ${TMAP_INDEX_file} + echo Align the RNAseq_sample fastq to transcriptome using TMAP + + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + + if [ "$file_type" == "fastq" ]; then + + ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam + fi + + file_type=`echo $f | tail -c 4` + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq + ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam + fi + + +elif [ "${RNAseqType}" == "Illumina-paired-end" ] +then + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# gunzip -c ${RNAseq_2} > RNAseq_2.fastq +# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + + if [ "$file_type" == "fastq" ]; then + + ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 ${RNAseq_1} -2 ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + fi + + file_type=`echo $f | tail -c 4` + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq + ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + fi + + +else + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + + if [ "$file_type" == "fastq" ]; then + + ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + fi + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq + ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + fi + +fi + + +echo Sorting + +LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam + + +if [ "${RNAseqType}" == "Illumina-paired-end" ] +then + echo IsoEM for RNAseq mapped to transcriptome + ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam + +else + echo IsoEM for RNAseq mapped to transcriptome + ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam +fi + +echo Join estimates files with ci files + +echo ls +#ls ./aligned_reads_sorted/ -ltr + +join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 +awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm +join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm +join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm +join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm + + +#echo Adding output directory to bootstap archive +# +#echo ls +#ls ./aligned_reads_sorted/ -ltr +# +#cd aligned_reads_sorted +#echo ls +#ls -ltrh +#gunzip bootstrap.tar.gz +#tar rf bootstrap.tar output +#gzip bootstrap.tar +mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap} + + +#echo ls after gz +#ls -ltr +# +#cd .. +#pwd + + +#gunzip ./aligned_reads_sorted/bootstrap.tar.gz +#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output +#gzip ./aligned_reads_sorted/bootstrap.tar + +echo ls after gz +ls -ltr + +#4. Copy output files +############################################################# +mv gene_fpkm ${out_gene_fpkm} +mv gene_tpm ${out_gene_tpm} +mv iso_fpkm ${out_iso_fpkm} +mv iso_tpm ${out_iso_tpm} + +#5.Remove files +############################################################# +rm RNAseq_transcriptome.sam +rm aligned_reads_sorted.sam +rm -rf aligned_reads_sorted + +echo "done" +date + + + +