changeset 18:c6d2dbdf0a4d draft

Uploaded
author saharlcc
date Fri, 26 May 2017 08:13:06 -0400
parents 40e893e4a37f
children 532b1e5feb8f
files isoem2_isode2.zip isoem2_isode2/IsoEM.loc isoem2_isode2/IsoEM.loc.sample isoem2_isode2/README.txt isoem2_isode2/isoDE.xml isoem2_isode2/isoem_wrapper.sh isoem2_isode2/isoem_wrapper.xml isoem2_isode2/tool_data_table_conf.xml isoem2_isode2/tool_data_table_conf.xml.sample
diffstat 9 files changed, 120 insertions(+), 274 deletions(-) [+]
line wrap: on
line diff
Binary file isoem2_isode2.zip has changed
--- a/isoem2_isode2/IsoEM.loc	Fri May 26 07:48:46 2017 -0400
+++ b/isoem2_isode2/IsoEM.loc	Fri May 26 08:13:06 2017 -0400
@@ -3,10 +3,10 @@
 #NOTE: All entries in this file MUST be tab-delimited
 
 #Every entry has the following 5 fields:
-#Reference name	<GTF>	<TMAP_INDEX> <HISAT2_INDEX> <Cluster file>
-
+#Referencename	<GTF>	<HISAT2_INDEX> <Cluster file>
 
 
-mm10-RefSeq77	/import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-hg38-RefSeq77	/import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-	
\ No newline at end of file
+hg38_RefSeq	/galaxy-prod/galaxy/tools-dependencies/references/GTF/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta.GTF	/galaxy-prod/galaxy/tools-dependencies/references/hisat2-index/hg38-RefSeq/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/galaxy-prod/galaxy/tools-dependencies/references/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+mm10_RefSeq	/galaxy-prod/galaxy/tools-dependencies/references/GTF/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta.GTF	/galaxy-prod/galaxy/tools-dependencies/references/hisat2-index/mm10-RefSeq/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/galaxy-prod/galaxy/tools-dependencies/references/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+
+	
--- a/isoem2_isode2/IsoEM.loc.sample	Fri May 26 07:48:46 2017 -0400
+++ b/isoem2_isode2/IsoEM.loc.sample	Fri May 26 08:13:06 2017 -0400
@@ -3,10 +3,10 @@
 #NOTE: All entries in this file MUST be tab-delimited
 
 #Every entry has the following 5 fields:
-#Reference name	<GTF>	<TMAP_INDEX> <HISAT2_INDEX> <Cluster file>
-
+#Referencename	<GTF>	<HISAT2_INDEX> <Cluster file>
 
 
-mm10-RefSeq77	/import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-hg38-RefSeq77	/import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-	
\ No newline at end of file
+hg38_RefSeq	/galaxy-prod/galaxy/tools-dependencies/references/GTF/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta.GTF	/galaxy-prod/galaxy/tools-dependencies/references/hisat2-index/hg38-RefSeq/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/galaxy-prod/galaxy/tools-dependencies/references/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+mm10_RefSeq	/galaxy-prod/galaxy/tools-dependencies/references/GTF/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta.GTF	/galaxy-prod/galaxy/tools-dependencies/references/hisat2-index/mm10-RefSeq/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/galaxy-prod/galaxy/tools-dependencies/references/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+
+	
--- a/isoem2_isode2/README.txt	Fri May 26 07:48:46 2017 -0400
+++ b/isoem2_isode2/README.txt	Fri May 26 08:13:06 2017 -0400
@@ -16,9 +16,11 @@
 
 4) Edit isoem_wrapper.sh and IsoDE2.sh. Change tool paths to where they are installed on your system 
 
-5) build hisat2 and/or tmap index, depending on the type of your RNA-Seq data, for the reference you will use for mapping
+5) build hisat2 for the reference you will use for mapping
 
 6) Edit the .loc file with paths to your reference files: GTF, Transcript to gene cluster, and mapper indices), and copy it under tool-data directory in Galaxy
 Note: If you need only one of the two mappers (say hisat2), you can replace the other path to the other maaper index with a dummy path
 
 7) copy the entry in the tool_data_table_conf.xml file provided in this repository to the tool_data_table_conf.xml in your Galaxy config directory
+
+
--- a/isoem2_isode2/isoDE.xml	Fri May 26 07:48:46 2017 -0400
+++ b/isoem2_isode2/isoDE.xml	Fri May 26 08:13:06 2017 -0400
@@ -19,7 +19,7 @@
  </command>
 
   <inputs>
-    <param name="sampleName1"  size="10" type="text"  label="Name for Condition 1" value="Condition1"/>
+    <param name="sampleName1"  size="10" type="text"  label="Name for Condition 1" value="Condition1" help="Output files label"/>
 
     <param name="condition1" type="data" label="Select data for Condition 1" format="gz" help="Condition 1 isoEM2 compressed output file"/>
 <!--
@@ -29,7 +29,7 @@
       <param name="c1Rep" label="Add replicate" type="data" format="gz" data_ref="condtion1" />
     </repeat>
 
-    <param name="sampleName2"  size="10" type="text"  label="Name for Condition 2" value="Condition2"/>
+    <param name="sampleName2"  size="10" type="text"  label="Name for Condition 2" value="Condition2" help="Output files label"/>
 
     <param format="gz" name="condition2" type="data" label="Select data for Condition 2"  help="Condition 2 isoEM2 compressed output file"/>
 <!--
--- a/isoem2_isode2/isoem_wrapper.sh	Fri May 26 07:48:46 2017 -0400
+++ b/isoem2_isode2/isoem_wrapper.sh	Fri May 26 08:13:06 2017 -0400
@@ -1,6 +1,5 @@
 #!/bin/bash
 
-
 echo $@
 echo pwd
 pwd
@@ -9,347 +8,189 @@
 tmapPath=${toolpath}/bin
 prinseqPath=${toolpath}/bin
 bedtoolsPath=${toolpath}/bin
-	
-
-#tmapPath=/usr/local/bin
-#bedtoolsPath=/usr/bin
-#hisat2Path=/usr/local/bin
-
 tempDir=/galaxy-prod/tmp
-
-
 isoem2Path=${toolpath}/bin/isoem2/bin
 
-#exit;
 
 arg=($*)
 i=0
 for a in ${arg[*]}
 do
 ((i++))
-	if [ "$a" == "--input1" ]; then 
+	if [ "$a" == "--input1" ]; then
 		RNAseq_1=${arg[i]}
 	fi
 
-        if [ "$a" == "--input2" ]; then 
+        if [ "$a" == "--input2" ]; then
 		RNAseq_2=${arg[i]}
 	fi
-		
-	if [ "$a" == "--GTF" ]; then 
+
+	if [ "$a" == "--GTF" ]; then
 		GTF_file=${arg[i]}
 	fi
 
-        if [ "$a" == "--TMAP_INDEX" ]; then 
+        if [ "$a" == "--TMAP_INDEX" ]; then
 		TMAP_INDEX_file=${arg[i]}
 	fi
 
-        if [ "$a" == "--HISAT2_INDEX" ]; then 
+        if [ "$a" == "--HISAT2_INDEX" ]; then
 		HISAT2_INDEX_file=${arg[i]}
         fi
 
-	if [ "$a" == "--Cluster" ]; then 
+	if [ "$a" == "--Cluster" ]; then
 		Cluster_file=${arg[i]}
 	fi
-	
-	if [ "$a" == "-m" ]; then 
+
+	if [ "$a" == "-m" ]; then
 		M=${arg[i]}
 	fi
-	
-	if [ "$a" == "-d" ]; then 
+
+	if [ "$a" == "-d" ]; then
 		D=${arg[i]}
 	fi
 
-	if [ "$a" == "--out_gene_fpkm" ]; then 
+	if [ "$a" == "--out_gene_fpkm" ]; then
 		out_gene_fpkm=${arg[i]}
 	fi
 
-        if [ "$a" == "--out_gene_tpm" ]; then 
+        if [ "$a" == "--out_gene_tpm" ]; then
 		out_gene_tpm=${arg[i]}
 	fi
 
-        if [ "$a" == "--out_iso_fpkm" ]; then 
+        if [ "$a" == "--out_iso_fpkm" ]; then
 		out_iso_fpkm=${arg[i]}
  	fi
 
-        if [ "$a" == "--out_iso_tpm" ]; then 
+        if [ "$a" == "--out_iso_tpm" ]; then
 		out_iso_tpm=${arg[i]}
 	fi
 
-        if [ "$a" == "--out_bootstrap" ]; then 
+        if [ "$a" == "--out_bootstrap" ]; then
 		out_bootstrap=${arg[i]}
 	fi
 
-        if [ "$a" == "--RNA_type" ]; then 
+        if [ "$a" == "--RNA_type" ]; then
 		RNAseqType=${arg[i]}
 	fi
 
-        if [ "$a" == "--fastaFile" ]; then 
+        if [ "$a" == "--fastaFile" ]; then
 		FastaFile=${arg[i]}
+		fasta=yes
 	fi
-
-	if [ "$a" == "--MinReadLength" ]; then 
+ 
+	if [ "$a" == "--MinReadLength" ]; then
 		MinReadLengthNum=${arg[i]}
 	fi
 
 done
 
 
-
-
-if [ "${RNAseqType}" == "Ion-Torrent-Proton" ]
-then 
-        echo ${TMAP_INDEX_file}
-        echo Align the RNAseq_sample fastq to transcriptome using TMAP
-
-        f=$(basename ${RNAseq_1})
-#        file_type=`echo $f | tail -c 9`
-
-#        if [ "$file_type" == "fastq.gz" ]; then 
-
-#            echo "Unzip fastq files"
-
-#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
-#            ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
-#        fi
- 
-        file_type=`echo $f | tail -c 6`
-	echo file type $file_type
-
-        if [ "$file_type" == "fastq" ]; then
-
-            #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
-	    cp ${RNAseq_1} RNAseq_1.fastq
-        fi
-
-        file_type=`echo $f | tail -c 4`
-
-        if [ "$file_type" == "bam" ]; then 
-
-           echo "Convert BAM to fastq"
-
-           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
-
+#Sahar 5/20/2017 adding support for user uploaded reference transcriptome
+if [ "${fasta}" == "yes" ]
+then
+    echo building reference index and GTF
+    ln -s ${FastaFile} reference.fasta
+    ${hisat2Path}/hisat2-build reference.fasta reference.fasta
+    HISAT2_INDEX_file=reference.fasta
+    ${isoem2Path}/fastaToGTF reference.fasta
+    ls -ltrh
+    GTF_file=reference.fasta.GTF
+    awk '{print substr($12,2,length($12)-3)}' reference.fasta.GTF | sort | uniq | awk '{print $1 "\t" $1}' > cluster.txt
+    Cluster_file=cluster.txt
+    ls ${Cluster_file} -ltr
+    ls ${GTF_file} -ltr
+    ls ${HISAT2_INDEX_file} -ltr
+fi
 
-#           ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
-        fi
-
-	   echo @@@@@@ Number of raw reads in  Proton RNA tumor @@@@@@@
-	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq
-
-	   echo filter the RNA fastq QC less than 20 and duplicates 
-	   perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_len ${MinReadLengthNum} -min_qual_mean 20  
- 
-#	   rm RNA_1.fastq
-
-	   echo @@@@@@ Number of cleaned reads in  Proton RNA tumor @@@@@@@
-	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq
-
-        ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam
-
+if [ "${RNAseqType}" == "Illumina-paired-end" ]
+then
+        f=$(basename ${RNAseq_1})
 
-elif [ "${RNAseqType}" == "Ion-Torrent-Proton-mm9" ]
-then 
-        echo ${TMAP_INDEX_file}
-        echo Align the RNAseq_sample fastq to transcriptome using TMAP
-
-        f=$(basename ${RNAseq_1})
-#        file_type=`echo $f | tail -c 9`
-
-#        if [ "$file_type" == "fastq.gz" ]; then 
-
-#            echo "Unzip fastq files"
-
-#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
-#            ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
-#        fi
- 
         file_type=`echo $f | tail -c 6`
-	echo file type $file_type
-
         if [ "$file_type" == "fastq" ]; then
-
-            #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
-	    cp ${RNAseq_1} RNAseq_1.fastq
+	   ln ${RNAseq_1}  RNAseq_1.fastq
+	   ln ${RNAseq_2}  RNAseq_2.fastq
         fi
 
         file_type=`echo $f | tail -c 4`
-
-        if [ "$file_type" == "bam" ]; then 
-
+        if [ "$file_type" == "bam" ]; then
            echo "Convert BAM to fastq"
-
            ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
-
-
-#           ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq
         fi
 
-	   echo @@@@@@ Number of raw reads in  Proton RNA tumor @@@@@@@
-	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq
+	echo @@@@@@ Number of raw Ilumina RNA-Seq reads @@@@@@@
+	python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq RNAseq_2.fastq
 
-	   echo filter the RNA fastq QC less than 20 and duplicates 
-	   perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20  -min_len ${MinReadLengthNum}
- 
-#	   rm RNA_1.fastq
-
-	   echo @@@@@@ Number of cleaned reads in  Proton RNA tumor @@@@@@@
-	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq
-
-        ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam
-
+	echo filter the RNA fastq QC less than 20 and duplicates
+	perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -fastq2 RNAseq_2.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20  -min_len ${MinReadLengthNum}
 
-elif [ "${RNAseqType}" == "Illumina-paired-end" ]
-then        
-        f=$(basename ${RNAseq_1})
-#        file_type=`echo $f | tail -c 9`
-
-#        if [ "$file_type" == "fastq.gz" ]; then 
+	echo @@@@@@ Number of cleaned Ilumina RNA-Seq reads @@@@@@@
+	python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup_1.fastq RNA_QC_dup_2.fastq
 
-#            echo "Unzip fastq files"
+        ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  RNA_QC_dup_1.fastq -2  RNA_QC_dup_2.fastq \
+           --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 16  > RNAseq_transcriptome.sam
 
-#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
-#            gunzip -c ${RNAseq_2} > RNAseq_2.fastq
-#            /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1  RNAseq_1.fastq -2  RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
-#        fi
+        rm RNA*.fastq
+
+else # [ "${RNAseqType}" == "Illumina-single-end"  || "${RNAseqType}" == "Ion-Torrent-Proton" ] Sahar 05/05/2017 hisat2 is used for Proton - exact code as Illumina single
+        f=$(basename ${RNAseq_1})
 
         file_type=`echo $f | tail -c 6`
-
         if [ "$file_type" == "fastq" ]; then
-
-
-#            ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  ${RNAseq_1} -2  ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
-	     cp ${RNAseq_1}  RNAseq_1.fastq
-	     cp ${RNAseq_2}  RNAseq_2.fastq
-
-
+	    ln ${RNAseq_1} RNAseq_1.fastq
         fi
 
         file_type=`echo $f | tail -c 4`
-
-        if [ "$file_type" == "bam" ]; then 
-
+        if [ "$file_type" == "bam" ]; then
            echo "Convert BAM to fastq"
-
            ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
-           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq
-#           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  RNAseq_1.fastq -2  RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
         fi
 
-
-	echo @@@@@@ Number of raw reads not paired in  Ilumina RNA tumor @@@@@@@
-	python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq RNAseq_2.fastq 
-
-
-	echo filter the RNA fastq QC less than 20 and duplicates 
-	perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -fastq2 RNAseq_2.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20  -min_len ${MinReadLengthNum} 
-
-	echo @@@@@@ Number of cleaned reads not paired in  Ilumina RNA tumor @@@@@@@
-	python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup_1.fastq RNA_QC_dup_2.fastq
+	echo @@@@@@ Number of raw Ilumina RNA-Seq reads @@@@@@@
+	python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq
 
-        ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  RNA_QC_dup_1.fastq -2  RNA_QC_dup_2.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8  > RNAseq_transcriptome.sam
-        
-else  
-        f=$(basename ${RNAseq_1})
-#        file_type=`echo $f | tail -c 9`
-
-#        if [ "$file_type" == "fastq.gz" ]; then 
-
-#            echo "Unzip fastq files"
-
-#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
-#            /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U  RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8  > RNAseq_transcriptome.sam
-#        fi
+	echo filter the RNA fastq QC less than 20 and duplicates
+	perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20  -min_len ${MinReadLengthNum}
 
-        file_type=`echo $f | tail -c 6`
-
-        if [ "$file_type" == "fastq" ]; then
-	    cp ${RNAseq_1} RNAseq_1.fastq
-
-            #${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
-        fi
-
-        if [ "$file_type" == "bam" ]; then 
-
-           echo "Convert BAM to fastq"
+	echo @@@@@@ Number of cleaned Ilumina RNA-Seq reads @@@@@@@
+	python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq
 
-           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
-#           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
-        fi
-
-	   echo @@@@@@ Number of raw reads in  Proton RNA tumor @@@@@@@
-	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq
+        ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNA_QC_dup.fastq \
+           --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 16  > RNAseq_transcriptome.sam
 
-	   echo filter the RNA fastq QC less than 20 and duplicates 
-	   perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20  -min_len ${MinReadLengthNum}
- 
-#	   rm RNA_1.fastq
-
-	   echo @@@@@@ Number of cleaned reads in  Proton RNA tumor @@@@@@@
-	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq
-           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  RNA_QC_dup.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8  > RNAseq_transcriptome.sam
+        rm RNA*.fastq
 
 fi
 
-
 echo Sorting
-
 LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam
 
 
 if [ "${RNAseqType}" == "Illumina-paired-end" ]
-then 
-        echo IsoEM for RNAseq mapped to transcriptome
-        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam
+then
+        echo IsoEM for paired RNAseq mapped to transcriptome
+        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 --polyA 200 -a aligned_reads_sorted.sam
 
 else
-        echo IsoEM for RNAseq mapped to transcriptome
-        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam 
+        echo IsoEM for single-end RNAseq mapped to transcriptome
+        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} --polyA 200 -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam
 fi
 
 echo Join estimates files with ci files
 
-echo ls
-#ls  ./aligned_reads_sorted/ -ltr
-
 join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 
 awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm
 join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm
 join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm
 join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm
 
-
-#echo Adding output directory to bootstap archive
-#
-#echo ls
-#ls  ./aligned_reads_sorted/ -ltr
-#
-#cd aligned_reads_sorted
-#echo ls
-#ls -ltrh
-#gunzip bootstrap.tar.gz
-#tar rf bootstrap.tar output
-#gzip bootstrap.tar
 mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap}
 
-
-#echo ls after gz
-#ls -ltr
-#
-#cd ..
-#pwd
-
-
-#gunzip ./aligned_reads_sorted/bootstrap.tar.gz
-#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output
-#gzip ./aligned_reads_sorted/bootstrap.tar 
-
-echo ls after gz
-ls -ltr
-
 #4. Copy output files
 #############################################################
 mv gene_fpkm ${out_gene_fpkm}
-mv gene_tpm ${out_gene_tpm} 
+mv gene_tpm ${out_gene_tpm}
 mv iso_fpkm ${out_iso_fpkm}
 mv iso_tpm ${out_iso_tpm}
 
@@ -364,4 +205,3 @@
 
 
 
-
--- a/isoem2_isode2/isoem_wrapper.xml	Fri May 26 07:48:46 2017 -0400
+++ b/isoem2_isode2/isoem_wrapper.xml	Fri May 26 08:13:06 2017 -0400
@@ -16,7 +16,7 @@
 	    --MinReadLength $MinReadLength
 
             ## Handle reference file .
-            #if $referenceSource.CCDSsource == "history":
+            #if $referenceSource.TranscriptomeSource == "history":
                 --fastaFile $referenceSource.fastaFile
             #else:
                 --GTF $referenceSource.index.fields.GTF --TMAP_INDEX $referenceSource.index.fields.TMAP_INDEX --HISAT2_INDEX $referenceSource.index.fields.HISAT2_INDEX --Cluster $referenceSource.index.fields.Cluster
@@ -43,7 +43,7 @@
 	    <param name="sampleName"  size="10" type="text"  label="Sample name" value="Sample" help="Output files label"/>
 
         <conditional name="referenceSource">
-          <param name="CCDSsource" type="select" label="Will you upload a reference transcriptome fasta file from your history or use a built-in reference?" help="Built-ins were indexed using default options">
+          <param name="TranscriptomeSource" type="select" label="Will you upload a reference transcriptome fasta file from your history or use a built-in reference?" help="Built-ins were indexed using default options">
             <option value="indexed">Use a built-in reference</option>
             <option value="history">Use reference from the history</option>
           </param>
@@ -53,7 +53,7 @@
             </param>
           </when>
           <when value="history">
-            <param name="fastaFile" type="data" format="fasta" metadata_name="dbkey" label="Select CCDS fasta file from your history" />
+            <param name="fastaFile" type="data" format="fasta" metadata_name="dbkey" label="Select transcriptome fasta file from your history" />
           </when>  <!-- history -->
         </conditional>  <!-- referenceSource -->
         <conditional name="Data">
@@ -69,16 +69,16 @@
 	        <option value="Illumina-single-end">Illumina single-end</option>
             </param>  <!-- RNAseqType -->
             <when value="Illumina-paired-end">
-              <param name="input1" type="data" label="RNA-Seq file1, fastq or bam format" />
-              <param name="input2" type="data" label="RNA-Seq file2, fastq or bam format" /> 
+              <param name="input1" type="data" format="fastq" label="RNA-Seq file1, fastq format" />
+              <param name="input2" type="data" format="fastq" label="RNA-Seq file2, fastq format" /> 
 	    </when>
             <when value="Ion-Torrent-Proton">
-	      <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" />
+	      <param name="input1" type="data" format="fastq" label="RNA-Seq file, fastq format" />
               <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" />
 	      <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> 
             </when>
 	    <when value="Illumina-single-end">
-	      <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" />
+	      <param name="input1" type="data" format="fastq" label="RNA-Seq file, fastq format" />
               <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" />
 	      <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> 
             </when>
@@ -111,7 +111,7 @@
 
 **Input Format**
 
-* The IsoEM2 tool can process RNA-seq reads generated by both Ion Torrent and Illumina platforms. RNA-Seq reads must be provided in fastq, fastq.gz, or bam formats. 
+* The IsoEM2 tool can process RNA-seq reads generated by both Ion Torrent and Illumina platforms. RNA-Seq reads must be provided in fastq format. 
 
 **Output Format**
 
@@ -127,6 +127,10 @@
 
 -----
 
+**BUILT-IN REFERENCES**
+
+* All reference files used in this pipeline can be found at http://dna.engr.uconn.edu/tmp/galaxy/tool-data/IsoEM.loc
+*
 	
 </help>
 
--- a/isoem2_isode2/tool_data_table_conf.xml	Fri May 26 07:48:46 2017 -0400
+++ b/isoem2_isode2/tool_data_table_conf.xml	Fri May 26 08:13:06 2017 -0400
@@ -1,10 +1,10 @@
-<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc-->
-<tables>
-    <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq -->
-    <table name="IsoEM" comment_char="#">
-        <columns>value, GTF, TMAP_INDEX, HISAT2_INDEX, Cluster </columns>
-        <file path="tool-data/IsoEM.loc" />
-    </table>
-
-</tables>
-
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq -->
+    <table name="IsoEM" comment_char="#">
+        <columns>value, GTF, HISAT2_INDEX, Cluster </columns>
+        <file path="tool-data/IsoEM.loc" />
+    </table>
+
+</tables>
+
--- a/isoem2_isode2/tool_data_table_conf.xml.sample	Fri May 26 07:48:46 2017 -0400
+++ b/isoem2_isode2/tool_data_table_conf.xml.sample	Fri May 26 08:13:06 2017 -0400
@@ -1,10 +1,10 @@
-<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc-->
-<tables>
-    <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq -->
-    <table name="IsoEM" comment_char="#">
-        <columns>value, GTF, TMAP_INDEX, HISAT2_INDEX, Cluster </columns>
-        <file path="tool-data/IsoEM.loc" />
-    </table>
-
-</tables>
-
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq -->
+    <table name="IsoEM" comment_char="#">
+        <columns>value, GTF, HISAT2_INDEX, Cluster </columns>
+        <file path="tool-data/IsoEM.loc" />
+    </table>
+
+</tables>
+