changeset 10:78d03bf22a1f draft

- Add prinseq command to filter RNA-Seq data - Fix in interpreting p-value when replicates are used
author saharlcc
date Thu, 16 Mar 2017 13:44:03 -0400
parents ce0a125b3cd1
children 630d5a01ef13
files FC_Filter_IsoDE_wrapper.sh FC_Filter_IsoDE_wrapper.xml IsoEM.loc IsoEM.loc.sample isoDE.xml isoDE2.sh isoem2_isode2/FC_Filter_IsoDE_wrapper.sh isoem2_isode2/FC_Filter_IsoDE_wrapper.xml isoem2_isode2/IsoEM.loc isoem2_isode2/IsoEM.loc.sample isoem2_isode2/README.txt isoem2_isode2/isoDE.xml isoem2_isode2/isoDE2.sh isoem2_isode2/isoem_wrapper.sh isoem2_isode2/isoem_wrapper.xml isoem2_isode2/tool_data_table_conf.xml isoem2_isode2/tool_data_table_conf.xml.sample isoem_wrapper.sh isoem_wrapper.xml tool_data_table_conf.xml tool_data_table_conf.xml.sample
diffstat 21 files changed, 945 insertions(+), 839 deletions(-) [+]
line wrap: on
line diff
--- a/FC_Filter_IsoDE_wrapper.sh	Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-tempDir=/galaxy-prod/tmp
-
-inputfile=${1}
-inputfile2=${2}
-outputfile=${3}
-inputFC=${4}
-
-
-if [ "${inputfile2}" == "1" ]
-then 
-        echo 
-        awk -v FC=$inputFC '{if (NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) print $0}' $inputfile > $outputfile
-#exit;
-
-elif [ "${inputfile2}" == "2" ]
-then        
-        awk -v FC=${inputFC} '{if (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2)))) print $0}' $inputfile > $outputfile
-else  
-        awk -v FC=${inputFC} '{if ((NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) || (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2))))) print $0}' $inputfile > $outputfile
-fi
-
-echo "done"
-date
-
-
-
-
-
-#logx(y) = logn(y)/logn(x) 
-
-#The NR==1 condition makes sure the file header gets printed
\ No newline at end of file
--- a/FC_Filter_IsoDE_wrapper.xml	Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-<tool id="FC_Filter_IsoDE2" name="Fold Change Filter">
-  <description>Filters IsoDE2 output based on fold change</description>
-  <command interpreter="bash">
-     FC_Filter_IsoDE_wrapper.sh $input1 $input2 $out_file $FC_min
-  </command>
-  <inputs>  
-
-    <param name="input1" type="data" label="Select data from IsoDE output files" />
-    <param name="input2" type="select" label="A list of genes/isoforms over expressed in:">
-      <option value="1">Condition 1</option>
-      <option value="2">Condition 2</option>
-      <option value="3">Condition 1 or Condition 2</option>
-    </param>
-    <param name="FC_min" type="integer" value="2" label="Minimum Fold change:" />
-  </inputs> 
-
-  <outputs>
-    <data format="tabular" name="out_file" metadata_source="input1" label="Filtered IsoDE"  />
-  </outputs>
-
-
-<help>  
-**What it does**
-
-This filter selects from the IsoDE2 output file, genes that genes that have fold change (ratio between the gene expression in the two conditions in comparison) greater 
-than or equal to the a certain threshold set by the user.
-
-
-**Input**
-
-* 1- IsoDE2 output file. It can be any of the output files generated by an IsoDE2 runs; for genes or isoforms/FPKM or TPM
-* 2- Direction of over expression of interest to the user (genes over expressed in condition1, over expressed in condition 2, or overexpressed in either condition)
-* 3- Minimim fold change of interest
-*
-*
-
-
-
-
-**Output**
-
-
-The output is a tabular file with the same fields as the input file, containing the subset of genes satisfying the conditions specified by the used.
-
-</help>
-</tool>
-
-
-
-
-
-
-
-
-
-     
--- a/IsoEM.loc	Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-#This file has file paths for the GTF, cluster file and tmap and hisat2  indices for IsoEM2 
-
-#NOTE: All entries in this file MUST be tab-delimited
-
-#Every entry has the following 5 fields:
-#Reference name	<GTF>	<TMAP_INDEX> <HISAT2_INDEX> <Cluster file>
-
-
-
-mm10-RefSeq77	/import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-hg38-RefSeq77	/import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-	
\ No newline at end of file
--- a/IsoEM.loc.sample	Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-#This file has file paths for the GTF, cluster file and tmap and hisat2  indices for IsoEM2 
-
-#NOTE: All entries in this file MUST be tab-delimited
-
-#Every entry has the following 5 fields:
-#Reference name	<GTF>	<TMAP_INDEX> <HISAT2_INDEX> <Cluster file>
-
-
-
-mm10-RefSeq77	/import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-hg38-RefSeq77	/import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-	
\ No newline at end of file
--- a/isoDE.xml	Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-<tool id="isoDE" name="IsoDE2">
-  <description>Compute gene Differential Expression based on IsoEM2 output </description>
-  <command interpreter="bash">isoDE2.sh
-      -c1
-      $condition1
-      #for $r in $condition1replicates
-        ${r.c1Rep}
-      #end for
-      -c2
-      $condition2
-      #for $r in $condition2replicates
-        ${r.c2Rep}
-      #end for
-      -pval $pval
-      -geneFPKMout $geneFPKM
-	-geneTPMout $geneTPM
-	-isoFPKMout $isoformFPKM  
-	-isoTPMout $isoformTPM
- </command>
-
-  <inputs>
-  <param name="condition1" type="data" label="Select data for Condition 1" format="gz" help="Condition 1 isoEM2 compressed output file"/>
-<!--
-    <param format="toolshed.gz" name="condition1" type="data" label="Select data for Condition 1" help="Condition 1 isoEM2 compressed output file"/>
--->
-    <repeat name="condition1replicates" title="Replicates for Condition 1">
-      <param name="c1Rep" label="Add replicate" type="data" format="gz" data_ref="condtion1" />
-    </repeat>
-
-    <param format="gz" name="condition2" type="data" label="Select data for Condition 2"  help="Condition 2 isoEM2 compressed output file"/>
-<!--
-    <param  format="toolshed.gz" name="condition2" type="data" label="Select data for Condition 2" help="Condition 2 IsoEM2 compressed output file"/>
--->
-    <repeat name="condition2replicates" title="Replicates for Condition 2">
-      <param format="gz" name="c2Rep" label="Add replicate" type="data" data_ref="condtion2" />
-    </repeat>
-
-
-    <param name="pval" label="p-value" type="float" value="0.05" help="Desired p-value to for which a reliable fold change level will be reported" />
-
-  </inputs>
-  <outputs>
-    <data format="tabular" name="geneFPKM" label="isoDE gene fpkm"  />
-    <data format="tabular" name="isoformFPKM" label="isoDE isoform fpkm"  />
-    <data format="tabular" name="geneTPM" label="isoDE gene tpm"  />
-    <data format="tabular" name="isoformTPM" label="isoDE isoform tpm"  />
-  </outputs>
-
-<help>
-**What it does**
-
-Computes gene and isoform differential expression between two conditions (example tumor and normal) for both Fragment per Kilobase of transcript length per Million 
-bases (FPKM) and Transcripts per Million (TPM) values. The computation is based on the boostraping output generated by IsoEM2. The number of bootstrap iterations for 
-IsoEM2 should be >= 20 (suggested 200).
-
-**Input**
-
-* - One or more IsoEM output files (compressed tar files) for each of the two conditions. More than one file can be used if there are replicated for either condition
-* - Desired p-value to for which a reliable fold change level will be reported
-*
-
-
-
-
-**Output**
-
-* four output files containinag results for Gene FPKM DE, Gene TPM DE, Isoform FPKM DE, and Isoform TPM DE. The four files have identical format with the following fields
-* 1- Gene/isoform ID
-* 2- Reliable log_2(FC) : conservative estimate of fold change in log base 2. 
-*               For the confidence level specified by the input p-value, fold change of gene/isoform abundance (FPKM/TPM) in condition 2 compared condition 1 is 
-*               at least 2 ^ absoulte value of this field.The sign indicates the direction, +ve means over expressed in condition 2, -ve means underexpressed in
-*               condition 1. NDE indicates that no change was detected.
-* 3- log_2(condition 1 FPKM (or TPM)/condition 1 FPKM(or TPM)) based on IsoEM2 run without bootstrapping
-* 4- condition 1 FPKM (or TPM) based on IsoEM2 run without bootstrapping
-* 5- condition 2 FPKM (or TPM) based on IsoEM2 run without bootstrapping
-
-
-</help>
-</tool>
--- a/isoDE2.sh	Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,210 +0,0 @@
-#!/bin/bash
-
-isoEMDir=/home/projects/isoem2/isoem-workingversion
-toolpath=${isoEMDir}/bin
-isoDEPath=${isoEMDir}/bin
-supportCalcPath=${isoEMDir}/src/calc
-fpkmGeneCommand=""
-fpkmIsoformCommand=""
-tpmGeneCommand=""  
-tpmIsoformCommand=""
-
-
-arg=($*)
-i=0
-while [ $i -lt $# ]
-do
-        a=${arg[i]}
-	if [ "$a" == "-c1" ]; then
-		fpkmGeneCommand="$fpkmGeneCommand -c1"
-		fpkmIsoformCommand="$fpkmIsoformCommand -c1"
-		tpmGeneCommand="$tpmGeneCommand -c1"
-		tpmIsoformCommand="$tpmIsoformCommand -c1"
-		
-		((i++))
-		 a=${arg[i]}
-		rep=1
-		while [[  `expr index "$a" "/"` -ne 0  && $i -lt $# ]]
-		do
-			condition1File=$a
-			${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c1_rep${rep}
-
-			fpkmGeneCommand="$fpkmGeneCommand c1_rep${rep}_fpkm_G"
-			fpkmIsoformCommand="$fpkmIsoformCommand c1_rep${rep}_fpkm_I"
-			tpmGeneCommand="$tpmGeneCommand c1_rep${rep}_tpm_G"
-			tpmIsoformCommand="$tpmIsoformCommand c1_rep${rep}_tpm_I"
-			((rep++))
-
-			((i++))
-			 a=${arg[i]}
-		done
-        elif [ "$a" == "-c2" ]; then 
-		fpkmGeneCommand="$fpkmGeneCommand -c2"
-		fpkmIsoformCommand="$fpkmIsoformCommand -c2"
-		tpmGeneCommand="$tpmGeneCommand -c2"
-		tpmIsoformCommand="$tpmIsoformCommand -c2"
-
-		((i++))
-		 a=${arg[i]}
-		rep=1
-		while [[  `expr index "$a" "/"` -ne 0 &&  $i -lt $# ]]
-                do
-			condition1File=$a
-			#echo $condition1File
-			${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c2_rep${rep}
-
-			fpkmGeneCommand="$fpkmGeneCommand c2_rep${rep}_fpkm_G"
-			fpkmIsoformCommand="$fpkmIsoformCommand c2_rep${rep}_fpkm_I"
-			tpmGeneCommand="$tpmGeneCommand c2_rep${rep}_tpm_G"
-			tpmIsoformCommand="$tpmIsoformCommand c2_rep${rep}_tpm_I"
-			((rep++))
-
-                     ((i++))
-                     a=${arg[i]}
-                done
-	elif [ "$a" == "-pval" ]; then 
-		((i++))
-		pval=${arg[i]}
-		((i++))
-
-
-	elif [ "$a" == "-geneFPKMout" ]; then 
-		((i++))
-		geneFPKMout_file=${arg[i]}
-		((i++))
-
-
-	elif [ "$a" == "-geneTPMout" ]; then 
-		((i++))
-		geneTPMout_file=${arg[i]}
-		((i++))
-
-	elif [ "$a" == "-isoFPKMout" ]; then 
-		((i++))
-		isoFPKMout_file=${arg[i]}
-		((i++))
-
-	elif [ "$a" == "-isoTPMout" ]; then 
-		((i++))
-		isoTPMout_file=${arg[i]}
-		((i++))
-	else
-
-		((i++))
-	fi
-
-done
-
-support=`java -cp ${supportCalcPath} support 200 200 $pval`
-fpkmGeneCommand="$fpkmGeneCommand -b $support"
-fpkmIsoformCommand="$fpkmIsoformCommand -b $support"
-tpmGeneCommand="$tpmGeneCommand -b $support"
-tpmIsoformCommand="$tpmIsoformCommand -b $support"
-
-fpkmGeneCommand="$fpkmGeneCommand -dfc 2"
-fpkmIsoformCommand="$fpkmIsoformCommand -dfc 2"
-tpmGeneCommand="$tpmGeneCommand -dfc 2"
-tpmIsoformCommand="$tpmIsoformCommand -dfc 2"
- 
-
-#if [ "${out_prefix}" == "$(basename ${out_prefix} )" ]; then
-#	pth=`pwd`
-#	out_prefix=${pth}/${out_prefix}
-#fi
- 
-
-echo GENE FPKM
-echo moving start
-date
-
-mkdir fpkm_G
-cd fpkm_G
-mv ../c*_fpkm_G .
-
-
-echo isoDE start
-date
-
-
-${isoDEPath}/isodecalls $fpkmGeneCommand -out "output.txt"
-#awk '{for (f=1; f<=NF; f++) {if (f == NF)  printf "%s",$f; else printf "%s\t", $f }; print ""}' Bootstrap_Merge1_DIR/output.txt | sed 1,1d > ${geneFPKMout_file}
-
-echo awk command
-date
-
-
-awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t"  $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneFPKMout_file}
-
-cd ..
-
-
-echo ISOFORM FPKM
-
-echo moving start
-date
- 
-mkdir fpkm_I
-cd fpkm_I
-mv ../c*_fpkm_I .
-
-
-echo isoDE start
-date
-
-${isoDEPath}/isodecalls $fpkmIsoformCommand -out "output.txt"
-
-echo awk command
-date
-pwd
-awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t"  $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt  > ${isoFPKMout_file}
-cd ..
-
-
-echo ISOFORM TPM
-echo moving start
-date
-
-mkdir tpm_G
-cd tpm_G
-mv ../c*_tpm_G .
-
-echo isoDE start
-date
-
-${isoDEPath}/isodecalls $tpmGeneCommand -out "output.txt"
-
-echo awk command
-date
-pwd
-awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t"  $5 "\t" $6 "\t" $7}}'  Bootstrap_Merge1_DIR/output.txt  > ${geneTPMout_file}
-cd ..
-
-
-echo ISOFORM TPM
-echo moving start
-date
-
-mkdir tpm_I
-cd tpm_I
-mv ../c*_tpm_I .
-
-echo isoDE start
-date
-
-${isoDEPath}/isodecalls $tpmIsoformCommand -out "output.txt"
-
-echo awk command
-date
-
-awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t"  $5 "\t" $6 "\t" $7}}'  Bootstrap_Merge1_DIR/output.txt  > ${isoTPMout_file}
-
-cd ..
-
- 
-echo final cleanup
-date
-rm -fr fpkm_G fpkm_I tpm_G cd tpm_I
-echo done
-date
-
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/FC_Filter_IsoDE_wrapper.sh	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+tempDir=/galaxy-prod/tmp
+
+inputfile=${1}
+inputfile2=${2}
+outputfile=${3}
+inputFC=${4}
+
+
+if [ "${inputfile2}" == "1" ]
+then 
+        echo 
+        awk -v FC=$inputFC '{if (NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) print $0}' $inputfile > $outputfile
+#exit;
+
+elif [ "${inputfile2}" == "2" ]
+then        
+        awk -v FC=${inputFC} '{if (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2)))) print $0}' $inputfile > $outputfile
+else  
+        awk -v FC=${inputFC} '{if ((NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) || (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2))))) print $0}' $inputfile > $outputfile
+fi
+
+echo "done"
+date
+
+
+
+
+
+#logx(y) = logn(y)/logn(x) 
+
+#The NR==1 condition makes sure the file header gets printed
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/FC_Filter_IsoDE_wrapper.xml	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,58 @@
+<tool id="FC_Filter_IsoDE2" name="Fold Change Filter">
+  <description>Filters IsoDE2 output based on fold change</description>
+  <command interpreter="bash">
+     FC_Filter_IsoDE_wrapper.sh $input1 $input2 $out_file $FC_min
+  </command>
+  <inputs>  
+
+    <param name="input1" type="data" label="Select data from IsoDE output files" />
+    <param name="input2" type="select" label="A list of genes/isoforms over expressed in:">
+      <option value="1">Condition 1</option>
+      <option value="2">Condition 2</option>
+      <option value="3">Condition 1 or Condition 2</option>
+    </param>
+    <param name="FC_min" type="integer" value="2" label="Minimum Fold change:" />
+  </inputs> 
+
+  <outputs>
+    <data format="tabular" name="out_file" metadata_source="input1"  />
+<!-- label="Filtered IsoDE"  />  -->
+
+  </outputs>
+
+
+<help>  
+**What it does**
+
+This filter selects from the IsoDE2 output file, genes that genes that have fold change (ratio between the gene expression in the two conditions in comparison) greater 
+than or equal to the a certain threshold set by the user.
+
+
+**Input**
+
+* 1- IsoDE2 output file. It can be any of the output files generated by an IsoDE2 runs; for genes or isoforms/FPKM or TPM
+* 2- Direction of over expression of interest to the user (genes over expressed in condition1, over expressed in condition 2, or overexpressed in either condition)
+* 3- Minimim fold change of interest
+*
+*
+
+
+
+
+**Output**
+
+
+The output is a tabular file with the same fields as the input file, containing the subset of genes satisfying the conditions specified by the used.
+
+</help>
+</tool>
+
+
+
+
+
+
+
+
+
+     
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/IsoEM.loc	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,12 @@
+#This file has file paths for the GTF, cluster file and tmap and hisat2  indices for IsoEM2 
+
+#NOTE: All entries in this file MUST be tab-delimited
+
+#Every entry has the following 5 fields:
+#Reference name	<GTF>	<TMAP_INDEX> <HISAT2_INDEX> <Cluster file>
+
+
+
+mm10-RefSeq77	/import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+hg38-RefSeq77	/import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+	
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/IsoEM.loc.sample	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,12 @@
+#This file has file paths for the GTF, cluster file and tmap and hisat2  indices for IsoEM2 
+
+#NOTE: All entries in this file MUST be tab-delimited
+
+#Every entry has the following 5 fields:
+#Reference name	<GTF>	<TMAP_INDEX> <HISAT2_INDEX> <Cluster file>
+
+
+
+mm10-RefSeq77	/import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+hg38-RefSeq77	/import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf	/import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta	/import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+	
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/README.txt	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,11 @@
+To install IsoEM2, IsoDE2 and the FC filter
+
+1) Follow Galaxy instructions for installing a tool from the Tool Shed
+2) Download and install  IsoEM2/IsoDE2 (https://github.com/mandricigor/isoem2)
+3) Install other dependencies:
+- tmap (needed for ION Torrent data)
+- hisat2 (needed for Illumina data)
+- prinseq
+- bedtools
+4) Edit isoem_wrapper.sh and IsoDE2.sh. Change tool paths to where they are installed on your system 
+ 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/isoDE.xml	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,79 @@
+<tool id="isoDE" name="IsoDE2">
+  <description>Compute gene Differential Expression based on IsoEM2 output </description>
+  <command interpreter="bash">isoDE2.sh
+      -c1
+      $condition1
+      #for $r in $condition1replicates
+        ${r.c1Rep}
+      #end for
+      -c2
+      $condition2
+      #for $r in $condition2replicates
+        ${r.c2Rep}
+      #end for
+      -pval $pval
+      -geneFPKMout $geneFPKM
+	-geneTPMout $geneTPM
+	-isoFPKMout $isoformFPKM  
+	-isoTPMout $isoformTPM
+ </command>
+
+  <inputs>
+  <param name="condition1" type="data" label="Select data for Condition 1" format="gz" help="Condition 1 isoEM2 compressed output file"/>
+<!--
+    <param format="toolshed.gz" name="condition1" type="data" label="Select data for Condition 1" help="Condition 1 isoEM2 compressed output file"/>
+-->
+    <repeat name="condition1replicates" title="Replicates for Condition 1">
+      <param name="c1Rep" label="Add replicate" type="data" format="gz" data_ref="condtion1" />
+    </repeat>
+
+    <param format="gz" name="condition2" type="data" label="Select data for Condition 2"  help="Condition 2 isoEM2 compressed output file"/>
+<!--
+    <param  format="toolshed.gz" name="condition2" type="data" label="Select data for Condition 2" help="Condition 2 IsoEM2 compressed output file"/>
+-->
+    <repeat name="condition2replicates" title="Replicates for Condition 2">
+      <param format="gz" name="c2Rep" label="Add replicate" type="data" data_ref="condtion2" />
+    </repeat>
+
+
+    <param name="pval" label="p-value" type="float" value="0.05" help="Desired p-value to for which a reliable fold change level will be reported" />
+
+  </inputs>
+  <outputs>
+    <data format="tabular" name="geneFPKM" label="isoDE gene fpkm"  />
+    <data format="tabular" name="isoformFPKM" label="isoDE isoform fpkm"  />
+    <data format="tabular" name="geneTPM" label="isoDE gene tpm"  />
+    <data format="tabular" name="isoformTPM" label="isoDE isoform tpm"  />
+  </outputs>
+
+<help>
+**What it does**
+
+Computes gene and isoform differential expression between two conditions (example tumor and normal) for both Fragment per Kilobase of transcript length per Million 
+bases (FPKM) and Transcripts per Million (TPM) values. The computation is based on the boostraping output generated by IsoEM2. The number of bootstrap iterations for 
+IsoEM2 should be >= 20 (suggested 200).
+
+**Input**
+
+* - One or more IsoEM output files (compressed tar files) for each of the two conditions. More than one file can be used if there are replicated for either condition
+* - Desired p-value to for which a reliable fold change level will be reported
+*
+
+
+
+
+**Output**
+
+* four output files containinag results for Gene FPKM DE, Gene TPM DE, Isoform FPKM DE, and Isoform TPM DE. The four files have identical format with the following fields
+* 1- Gene/isoform ID
+* 2- Reliable log_2(FC) : conservative estimate of fold change in log base 2. 
+*               For the confidence level specified by the input p-value, fold change of gene/isoform abundance (FPKM/TPM) in condition 2 compared condition 1 is 
+*               at least 2 ^ absoulte value of this field.The sign indicates the direction, +ve means over expressed in condition 2, -ve means underexpressed in
+*               condition 1. 0 indicates that no change was detected.
+* 3- log_2(condition 2 FPKM (or TPM)/condition 1 FPKM(or TPM)) based on IsoEM2 run without bootstrapping
+* 4- condition 1 FPKM (or TPM) based on IsoEM2 run without bootstrapping
+* 5- condition 2 FPKM (or TPM) based on IsoEM2 run without bootstrapping
+
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/isoDE2.sh	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,218 @@
+#!/bin/bash
+
+isoEMDir=/galaxy/galaxy/tools-dependencies/bin/isoem2/
+toolpath=${isoEMDir}/bin
+isoDEPath=${isoEMDir}/bin
+supportCalcPath=${isoEMDir}/src/calc
+fpkmGeneCommand=""
+fpkmIsoformCommand=""
+tpmGeneCommand=""  
+tpmIsoformCommand=""
+
+numberOfBootstrapIterationsPerSample=199
+
+arg=($*)
+i=0
+while [ $i -lt $# ]
+do
+        a=${arg[i]}
+	if [ "$a" == "-c1" ]; then
+		fpkmGeneCommand="$fpkmGeneCommand -c1"
+		fpkmIsoformCommand="$fpkmIsoformCommand -c1"
+		tpmGeneCommand="$tpmGeneCommand -c1"
+		tpmIsoformCommand="$tpmIsoformCommand -c1"
+		
+		((i++))
+		 a=${arg[i]}
+		rep=1
+		while [[  `expr index "$a" "/"` -ne 0  && $i -lt $# ]]
+		do
+			condition1File=$a
+			${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c1_rep${rep}
+
+			fpkmGeneCommand="$fpkmGeneCommand c1_rep${rep}_fpkm_G"
+			fpkmIsoformCommand="$fpkmIsoformCommand c1_rep${rep}_fpkm_I"
+			tpmGeneCommand="$tpmGeneCommand c1_rep${rep}_tpm_G"
+			tpmIsoformCommand="$tpmIsoformCommand c1_rep${rep}_tpm_I"
+			((rep++))
+
+			((i++))
+			 a=${arg[i]}
+		done
+        elif [ "$a" == "-c2" ]; then 
+		fpkmGeneCommand="$fpkmGeneCommand -c2"
+		fpkmIsoformCommand="$fpkmIsoformCommand -c2"
+		tpmGeneCommand="$tpmGeneCommand -c2"
+		tpmIsoformCommand="$tpmIsoformCommand -c2"
+
+		((i++))
+		 a=${arg[i]}
+		rep=1
+		while [[  `expr index "$a" "/"` -ne 0 &&  $i -lt $# ]]
+                do
+			condition1File=$a
+			#echo $condition1File
+			${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c2_rep${rep}
+
+			fpkmGeneCommand="$fpkmGeneCommand c2_rep${rep}_fpkm_G"
+			fpkmIsoformCommand="$fpkmIsoformCommand c2_rep${rep}_fpkm_I"
+			tpmGeneCommand="$tpmGeneCommand c2_rep${rep}_tpm_G"
+			tpmIsoformCommand="$tpmIsoformCommand c2_rep${rep}_tpm_I"
+			((rep++))
+
+                     ((i++))
+                     a=${arg[i]}
+                done
+	elif [ "$a" == "-pval" ]; then 
+		((i++))
+		pval=${arg[i]}
+		((i++))
+
+
+	elif [ "$a" == "-geneFPKMout" ]; then 
+		((i++))
+		geneFPKMout_file=${arg[i]}
+		((i++))
+
+
+	elif [ "$a" == "-geneTPMout" ]; then 
+		((i++))
+		geneTPMout_file=${arg[i]}
+		((i++))
+
+	elif [ "$a" == "-isoFPKMout" ]; then 
+		((i++))
+		isoFPKMout_file=${arg[i]}
+		((i++))
+
+	elif [ "$a" == "-isoTPMout" ]; then 
+		((i++))
+		isoTPMout_file=${arg[i]}
+		((i++))
+	else
+
+		((i++))
+	fi
+
+done
+((rep--))
+bootstrap=$(($rep*$numberOfBootstrapIterationsPerSample))
+echo boostrap iterations $bootstrap
+#support=`java -cp ${supportCalcPath} support 200 200 $pval`
+#support=`java -cp ${supportCalcPath} support $bootstrap $bootstrap $pval`
+#Calculator assumes IsoDE does all pairs when calculation the number of ratios. It multiplies the number of bootstrap samples per condition
+# changed the second parameter to make the number of ratios equal to the number of bootstrap samples (match, not all pairs)
+echo calculate support based on p-value and number or replicates
+support=`java -cp ${supportCalcPath} support $bootstrap 1 $pval`
+fpkmGeneCommand="$fpkmGeneCommand -b $support"
+fpkmIsoformCommand="$fpkmIsoformCommand -b $support"
+tpmGeneCommand="$tpmGeneCommand -b $support"
+tpmIsoformCommand="$tpmIsoformCommand -b $support"
+
+fpkmGeneCommand="$fpkmGeneCommand -dfc 2"
+fpkmIsoformCommand="$fpkmIsoformCommand -dfc 2"
+tpmGeneCommand="$tpmGeneCommand -dfc 2"
+tpmIsoformCommand="$tpmIsoformCommand -dfc 2"
+ 
+
+#if [ "${out_prefix}" == "$(basename ${out_prefix} )" ]; then
+#	pth=`pwd`
+#	out_prefix=${pth}/${out_prefix}
+#fi
+ 
+
+echo GENE FPKM
+echo moving start
+date
+
+mkdir fpkm_G
+cd fpkm_G
+mv ../c*_fpkm_G .
+
+
+echo isoDE start
+date
+
+
+${isoDEPath}/isodecalls $fpkmGeneCommand -out "output.txt"
+#awk '{for (f=1; f<=NF; f++) {if (f == NF)  printf "%s",$f; else printf "%s\t", $f }; print ""}' Bootstrap_Merge1_DIR/output.txt | sed 1,1d > ${geneFPKMout_file}
+
+echo awk command
+date
+
+
+awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t"  $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneFPKMout_file}
+
+cd ..
+
+
+echo ISOFORM FPKM
+
+echo moving start
+date
+ 
+mkdir fpkm_I
+cd fpkm_I
+mv ../c*_fpkm_I .
+
+
+echo isoDE start
+date
+
+${isoDEPath}/isodecalls $fpkmIsoformCommand -out "output.txt"
+
+echo awk command
+date
+pwd
+awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t"  $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt  > ${isoFPKMout_file}
+cd ..
+
+
+echo ISOFORM TPM
+echo moving start
+date
+
+mkdir tpm_G
+cd tpm_G
+mv ../c*_tpm_G .
+
+echo isoDE start
+date
+
+${isoDEPath}/isodecalls $tpmGeneCommand -out "output.txt"
+
+echo awk command
+date
+pwd
+awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t"  $5 "\t" $6 "\t" $7}}'  Bootstrap_Merge1_DIR/output.txt  > ${geneTPMout_file}
+cd ..
+
+
+echo ISOFORM TPM
+echo moving start
+date
+
+mkdir tpm_I
+cd tpm_I
+mv ../c*_tpm_I .
+
+echo isoDE start
+date
+
+${isoDEPath}/isodecalls $tpmIsoformCommand -out "output.txt"
+
+echo awk command
+date
+
+awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t"  $5 "\t" $6 "\t" $7}}'  Bootstrap_Merge1_DIR/output.txt  > ${isoTPMout_file}
+
+cd ..
+
+ 
+echo final cleanup
+date
+rm -fr fpkm_G fpkm_I tpm_G cd tpm_I
+echo done
+date
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/isoem_wrapper.sh	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,367 @@
+#!/bin/bash
+
+
+echo $@
+echo pwd
+pwd
+toolpath=/galaxy-prod/galaxy/tools-dependencies
+hisat2Path=${toolpath}/bin
+tmapPath=${toolpath}/bin
+prinseqPath=${toolpath}/bin
+bedtoolsPath=${toolpath}/bin
+	
+
+#tmapPath=/usr/local/bin
+#bedtoolsPath=/usr/bin
+#hisat2Path=/usr/local/bin
+
+tempDir=/galaxy-prod/tmp
+
+
+isoem2Path=${toolpath}/bin/isoem2/bin
+
+#exit;
+
+arg=($*)
+i=0
+for a in ${arg[*]}
+do
+((i++))
+	if [ "$a" == "--input1" ]; then 
+		RNAseq_1=${arg[i]}
+	fi
+
+        if [ "$a" == "--input2" ]; then 
+		RNAseq_2=${arg[i]}
+	fi
+		
+	if [ "$a" == "--GTF" ]; then 
+		GTF_file=${arg[i]}
+	fi
+
+        if [ "$a" == "--TMAP_INDEX" ]; then 
+		TMAP_INDEX_file=${arg[i]}
+	fi
+
+        if [ "$a" == "--HISAT2_INDEX" ]; then 
+		HISAT2_INDEX_file=${arg[i]}
+        fi
+
+	if [ "$a" == "--Cluster" ]; then 
+		Cluster_file=${arg[i]}
+	fi
+	
+	if [ "$a" == "-m" ]; then 
+		M=${arg[i]}
+	fi
+	
+	if [ "$a" == "-d" ]; then 
+		D=${arg[i]}
+	fi
+
+	if [ "$a" == "--out_gene_fpkm" ]; then 
+		out_gene_fpkm=${arg[i]}
+	fi
+
+        if [ "$a" == "--out_gene_tpm" ]; then 
+		out_gene_tpm=${arg[i]}
+	fi
+
+        if [ "$a" == "--out_iso_fpkm" ]; then 
+		out_iso_fpkm=${arg[i]}
+ 	fi
+
+        if [ "$a" == "--out_iso_tpm" ]; then 
+		out_iso_tpm=${arg[i]}
+	fi
+
+        if [ "$a" == "--out_bootstrap" ]; then 
+		out_bootstrap=${arg[i]}
+	fi
+
+        if [ "$a" == "--RNA_type" ]; then 
+		RNAseqType=${arg[i]}
+	fi
+
+        if [ "$a" == "--fastaFile" ]; then 
+		FastaFile=${arg[i]}
+	fi
+
+	if [ "$a" == "--MinReadLength" ]; then 
+		MinReadLengthNum=${arg[i]}
+	fi
+
+done
+
+
+
+
+if [ "${RNAseqType}" == "Ion-Torrent-Proton" ]
+then 
+        echo ${TMAP_INDEX_file}
+        echo Align the RNAseq_sample fastq to transcriptome using TMAP
+
+        f=$(basename ${RNAseq_1})
+#        file_type=`echo $f | tail -c 9`
+
+#        if [ "$file_type" == "fastq.gz" ]; then 
+
+#            echo "Unzip fastq files"
+
+#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+#            ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+#        fi
+ 
+        file_type=`echo $f | tail -c 6`
+	echo file type $file_type
+
+        if [ "$file_type" == "fastq" ]; then
+
+            #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
+	    cp ${RNAseq_1} RNAseq_1.fastq
+        fi
+
+        file_type=`echo $f | tail -c 4`
+
+        if [ "$file_type" == "bam" ]; then 
+
+           echo "Convert BAM to fastq"
+
+           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+
+
+#           ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+        fi
+
+	   echo @@@@@@ Number of raw reads in  Proton RNA tumor @@@@@@@
+	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq
+
+	   echo filter the RNA fastq QC less than 20 and duplicates 
+	   perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_len ${MinReadLengthNum} -min_qual_mean 20  
+ 
+#	   rm RNA_1.fastq
+
+	   echo @@@@@@ Number of cleaned reads in  Proton RNA tumor @@@@@@@
+	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq
+
+        ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam
+
+
+elif [ "${RNAseqType}" == "Ion-Torrent-Proton-mm9" ]
+then 
+        echo ${TMAP_INDEX_file}
+        echo Align the RNAseq_sample fastq to transcriptome using TMAP
+
+        f=$(basename ${RNAseq_1})
+#        file_type=`echo $f | tail -c 9`
+
+#        if [ "$file_type" == "fastq.gz" ]; then 
+
+#            echo "Unzip fastq files"
+
+#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+#            ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+#        fi
+ 
+        file_type=`echo $f | tail -c 6`
+	echo file type $file_type
+
+        if [ "$file_type" == "fastq" ]; then
+
+            #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
+	    cp ${RNAseq_1} RNAseq_1.fastq
+        fi
+
+        file_type=`echo $f | tail -c 4`
+
+        if [ "$file_type" == "bam" ]; then 
+
+           echo "Convert BAM to fastq"
+
+           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+
+
+#           ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+        fi
+
+	   echo @@@@@@ Number of raw reads in  Proton RNA tumor @@@@@@@
+	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq
+
+	   echo filter the RNA fastq QC less than 20 and duplicates 
+	   perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20  -min_len ${MinReadLengthNum}
+ 
+#	   rm RNA_1.fastq
+
+	   echo @@@@@@ Number of cleaned reads in  Proton RNA tumor @@@@@@@
+	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq
+
+        ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam
+
+
+elif [ "${RNAseqType}" == "Illumina-paired-end" ]
+then        
+        f=$(basename ${RNAseq_1})
+#        file_type=`echo $f | tail -c 9`
+
+#        if [ "$file_type" == "fastq.gz" ]; then 
+
+#            echo "Unzip fastq files"
+
+#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+#            gunzip -c ${RNAseq_2} > RNAseq_2.fastq
+#            /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1  RNAseq_1.fastq -2  RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+#        fi
+
+        file_type=`echo $f | tail -c 6`
+
+        if [ "$file_type" == "fastq" ]; then
+
+
+#            ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  ${RNAseq_1} -2  ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+	     cp ${RNAseq_1}  RNAseq_1.fastq
+	     cp ${RNAseq_2}  RNAseq_2.fastq
+
+
+        fi
+
+        file_type=`echo $f | tail -c 4`
+
+        if [ "$file_type" == "bam" ]; then 
+
+           echo "Convert BAM to fastq"
+
+           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq
+#           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  RNAseq_1.fastq -2  RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+        fi
+
+
+	echo @@@@@@ Number of raw reads not paired in  Ilumina RNA tumor @@@@@@@
+	python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq RNAseq_2.fastq 
+
+
+	echo filter the RNA fastq QC less than 20 and duplicates 
+	perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -fastq2 RNAseq_2.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20  -min_len ${MinReadLengthNum} 
+
+	echo @@@@@@ Number of cleaned reads not paired in  Ilumina RNA tumor @@@@@@@
+	python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup_1.fastq RNA_QC_dup_2.fastq
+
+        ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  RNA_QC_dup_1.fastq -2  RNA_QC_dup_2.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8  > RNAseq_transcriptome.sam
+        
+else  
+        f=$(basename ${RNAseq_1})
+#        file_type=`echo $f | tail -c 9`
+
+#        if [ "$file_type" == "fastq.gz" ]; then 
+
+#            echo "Unzip fastq files"
+
+#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+#            /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U  RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8  > RNAseq_transcriptome.sam
+#        fi
+
+        file_type=`echo $f | tail -c 6`
+
+        if [ "$file_type" == "fastq" ]; then
+	    cp ${RNAseq_1} RNAseq_1.fastq
+
+            #${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+        fi
+
+        if [ "$file_type" == "bam" ]; then 
+
+           echo "Convert BAM to fastq"
+
+           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+#           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
+        fi
+
+	   echo @@@@@@ Number of raw reads in  Proton RNA tumor @@@@@@@
+	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq
+
+	   echo filter the RNA fastq QC less than 20 and duplicates 
+	   perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20  -min_len ${MinReadLengthNum}
+ 
+#	   rm RNA_1.fastq
+
+	   echo @@@@@@ Number of cleaned reads in  Proton RNA tumor @@@@@@@
+	   python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq
+           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  RNA_QC_dup.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8  > RNAseq_transcriptome.sam
+
+fi
+
+
+echo Sorting
+
+LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam
+
+
+if [ "${RNAseqType}" == "Illumina-paired-end" ]
+then 
+        echo IsoEM for RNAseq mapped to transcriptome
+        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam
+
+else
+        echo IsoEM for RNAseq mapped to transcriptome
+        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam 
+fi
+
+echo Join estimates files with ci files
+
+echo ls
+#ls  ./aligned_reads_sorted/ -ltr
+
+join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 
+awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm
+join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm
+join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm
+join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm
+
+
+#echo Adding output directory to bootstap archive
+#
+#echo ls
+#ls  ./aligned_reads_sorted/ -ltr
+#
+#cd aligned_reads_sorted
+#echo ls
+#ls -ltrh
+#gunzip bootstrap.tar.gz
+#tar rf bootstrap.tar output
+#gzip bootstrap.tar
+mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap}
+
+
+#echo ls after gz
+#ls -ltr
+#
+#cd ..
+#pwd
+
+
+#gunzip ./aligned_reads_sorted/bootstrap.tar.gz
+#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output
+#gzip ./aligned_reads_sorted/bootstrap.tar 
+
+echo ls after gz
+ls -ltr
+
+#4. Copy output files
+#############################################################
+mv gene_fpkm ${out_gene_fpkm}
+mv gene_tpm ${out_gene_tpm} 
+mv iso_fpkm ${out_iso_fpkm}
+mv iso_tpm ${out_iso_tpm}
+
+#5.Remove files
+#############################################################
+rm RNAseq_transcriptome.sam
+rm aligned_reads_sorted.sam
+rm -rf aligned_reads_sorted
+
+echo "done"
+date
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/isoem_wrapper.xml	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,135 @@
+<tool id="isoem" name="IsoEM2" version="1.0.0">
+    <description> Infers isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data</description>
+    <requirements>
+        
+    </requirements>
+    <command interpreter="bash">
+        isoem_wrapper.sh
+
+            ## Provide outputs.
+            --out_gene_fpkm $out_gene_fpkm
+            --out_gene_tpm $out_gene_tpm
+            --out_iso_fpkm $out_iso_fpkm
+            --out_iso_tpm $out_iso_tpm
+            --out_bootstrap $out_bootstrap
+	
+	    --MinReadLength $MinReadLength
+
+            ## Handle reference file .
+            #if $referenceSource.CCDSsource == "history":
+                --fastaFile $referenceSource.fastaFile
+            #else:
+                --GTF $referenceSource.index.fields.GTF --TMAP_INDEX $referenceSource.index.fields.TMAP_INDEX --HISAT2_INDEX $referenceSource.index.fields.HISAT2_INDEX --Cluster $referenceSource.index.fields.Cluster
+            #end if
+	   
+            ## First input file always required fastq1.
+            --input1 $Data.input1
+
+            ## Set params based on whether reads are single-end or paired.
+            #if $Data.RNAseqType == "Illumina-paired-end":
+  		--input2 $Data.input2
+	    #else:
+                -m $Data.lengthMean
+	        -d $Data.lengthSd
+	    #end if
+
+            ## RNA-Seq type based on sequencing platform.
+            --RNA_type $Data.RNAseqType &gt; $Run 2&gt;&amp;1
+
+        
+                          
+    </command>
+    <inputs>
+        <conditional name="referenceSource">
+          <param name="CCDSsource" type="select" label="Will you upload a reference transcriptome fasta file from your history or use a built-in reference?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in reference</option>
+            <option value="history">Use reference from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a reference dataset" help="If your reference of interest is not listed, contact the Galaxy team">
+              <options from_data_table="IsoEM" />
+            </param>
+          </when>
+          <when value="history">
+            <param name="fastaFile" type="data" format="fasta" metadata_name="dbkey" label="Select CCDS fasta file from your history" />
+          </when>  <!-- history -->
+        </conditional>  <!-- referenceSource -->
+        <conditional name="Data">
+<!--
+            <param name="sPaired" type="select" label="Is this library Single-end or Paired-end?">
+              <option value="single">Single-end</option>
+              <option value="paired">Paired-end</option>
+            </param>
+-->
+            <param name="RNAseqType" type="select" label="Select RNA-seq type">
+	        <option value="Ion-Torrent-Proton">Ion Torrent single-end</option>
+	       	<option value="Illumina-paired-end">Illumina paired-end</option>
+	        <option value="Illumina-single-end">Illumina single-end</option>
+            </param>  <!-- RNAseqType -->
+            <when value="Illumina-paired-end">
+              <param name="input1" type="data" label="RNA-Seq file1, fastq or bam format" />
+              <param name="input2" type="data" label="RNA-Seq file2, fastq or bam format" /> 
+	    </when>
+            <when value="Ion-Torrent-Proton">
+	      <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" />
+              <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" />
+	      <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> 
+            </when>
+	    <when value="Illumina-single-end">
+	      <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" />
+              <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" />
+	      <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> 
+            </when>
+        </conditional>  <!-- Data -->
+
+	  <param name="MinReadLength" label="Min. read length" type="text" value="50" />
+
+
+<!--
+        <param name="RNAseqType" type="select" label="Select RNA-seq type">
+           <option value="Ion-Torrent-Proton">Ion Torrent Proton</option>
+           <option value="Illumina-paired-end">Illumina paired-end</option>
+           <option value="Illumina-single-end">Illumina single-end</option>
+        </param>  
+-->
+    </inputs>
+    <outputs>
+        <data name="out_gene_fpkm" format="tabular" label="Gene_fpkm"/>
+    	<data name="out_gene_tpm" format="tabular" label="Gene_tpm"/>
+    	<data name="out_iso_fpkm" format="tabular" label="Iso_fpkm"/>
+    	<data name="out_iso_tpm" format="tabular" label="Iso_tpm"/>
+	<data name="out_bootstrap" format="toolshed.gz" label="Bootstrap.tar.gz"/>
+        <data name="Run" format="log"  label="isoem_wrapper: The log file" />
+    </outputs>
+<help>
+**What it does**
+
+* The IsoEM can be used to infer isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data. 
+
+**Input Format**
+
+* The tool accept the fastq, fastq.gz, bam formats. Extension must be specified at the end of the file names.
+* RNA-seq data must be Ion Torrent Proton or Illumina sequncing data.
+
+-----
+
+
+**Output Format**
+
+* Four output files containinag results for **Gene FPKM**, **Gene TPM**, **Isoform FPKM**, and **Isoform TPM**. The four files have identical format with the following fields.
+
+
+* 1 Gene/Isoform ID 
+* 2 Gene/Isoform FPKM (Fragments Per Kilobase per Million reads) or TPM (Transcripts per Million reads) 
+* 3 Min FPKM/TPM
+* 4 Max FPKM/TPM
+
+* And one compressed **Bootstrap.tar** file will be used in IsoDE2 to compute gene differential expression.
+</help>
+
+
+</tool>
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/tool_data_table_conf.xml	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,10 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq -->
+    <table name="IsoEM" comment_char="#">
+        <columns>value, GTF, CCDS_INDEX, Cluster </columns>
+        <file path="tool-data/IsoEM.loc" />
+    </table>
+
+</tables>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/tool_data_table_conf.xml.sample	Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,10 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq -->
+    <table name="IsoEM" comment_char="#">
+        <columns>value, GTF, CCDS_INDEX, Cluster </columns>
+        <file path="tool-data/IsoEM.loc" />
+    </table>
+
+</tables>
+
--- a/isoem_wrapper.sh	Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,256 +0,0 @@
-#!/bin/bash
-
-
-echo $@
-echo pwd
-pwd
-isoEMDir=/home/projects/isoem2/isoem-workingversion
-tmapPath=/usr/local/bin
-bedtoolsPath=/usr/local/bin
-hisat2Path=/usr/local/bin
-tempDir=/tmp
-
-
-isoem2Path=${isoEMDir}/bin
-
-#exit;
-
-arg=($*)
-i=0
-for a in ${arg[*]}
-do
-((i++))
-	if [ "$a" == "--input1" ]; then 
-		RNAseq_1=${arg[i]}
-	fi
-
-        if [ "$a" == "--input2" ]; then 
-		RNAseq_2=${arg[i]}
-	fi
-		
-	if [ "$a" == "--GTF" ]; then 
-		GTF_file=${arg[i]}
-	fi
-
-        if [ "$a" == "--TMAP_INDEX" ]; then 
-		TMAP_INDEX_file=${arg[i]}
-	fi
-
-        if [ "$a" == "--HISAT2_INDEX" ]; then 
-		HISAT2_INDEX_file=${arg[i]}
-        fi
-
-	if [ "$a" == "--Cluster" ]; then 
-		Cluster_file=${arg[i]}
-	fi
-	
-	if [ "$a" == "-m" ]; then 
-		M=${arg[i]}
-	fi
-	
-	if [ "$a" == "-d" ]; then 
-		D=${arg[i]}
-	fi
-
-	if [ "$a" == "--out_gene_fpkm" ]; then 
-		out_gene_fpkm=${arg[i]}
-	fi
-
-        if [ "$a" == "--out_gene_tpm" ]; then 
-		out_gene_tpm=${arg[i]}
-	fi
-
-        if [ "$a" == "--out_iso_fpkm" ]; then 
-		out_iso_fpkm=${arg[i]}
- 	fi
-
-        if [ "$a" == "--out_iso_tpm" ]; then 
-		out_iso_tpm=${arg[i]}
-	fi
-
-        if [ "$a" == "--out_bootstrap" ]; then 
-		out_bootstrap=${arg[i]}
-	fi
-
-        if [ "$a" == "--RNA_type" ]; then 
-		RNAseqType=${arg[i]}
-	fi
-
-        if [ "$a" == "--fastaFile" ]; then 
-		FastaFile=${arg[i]}
-	fi
-done
-
-
-
-if [ "${RNAseqType}" == "Ion-Torrent-Proton" ]
-then 
-        echo ${TMAP_INDEX_file}
-        echo Align the RNAseq_sample fastq to transcriptome using TMAP
-
-        f=$(basename ${RNAseq_1})
-#        file_type=`echo $f | tail -c 9`
-
-#        if [ "$file_type" == "fastq.gz" ]; then 
-
-#            echo "Unzip fastq files"
-
-#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
-#            ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
-#        fi
- 
-        file_type=`echo $f | tail -c 6`
-
-        if [ "$file_type" == "fastq" ]; then
-
-            ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
-        fi
-
-        file_type=`echo $f | tail -c 4`
-
-        if [ "$file_type" == "bam" ]; then 
-
-           echo "Convert BAM to fastq"
-
-           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
-           ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
-        fi
-
-
-elif [ "${RNAseqType}" == "Illumina-paired-end" ]
-then        
-        f=$(basename ${RNAseq_1})
-#        file_type=`echo $f | tail -c 9`
-
-#        if [ "$file_type" == "fastq.gz" ]; then 
-
-#            echo "Unzip fastq files"
-
-#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
-#            gunzip -c ${RNAseq_2} > RNAseq_2.fastq
-#            /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1  RNAseq_1.fastq -2  RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
-#        fi
-
-        file_type=`echo $f | tail -c 6`
-
-        if [ "$file_type" == "fastq" ]; then
-
-            ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  ${RNAseq_1} -2  ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
-        fi
-
-        file_type=`echo $f | tail -c 4`
-
-        if [ "$file_type" == "bam" ]; then 
-
-           echo "Convert BAM to fastq"
-
-           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
-           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq
-           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1  RNAseq_1.fastq -2  RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
-        fi
-
-        
-else  
-        f=$(basename ${RNAseq_1})
-#        file_type=`echo $f | tail -c 9`
-
-#        if [ "$file_type" == "fastq.gz" ]; then 
-
-#            echo "Unzip fastq files"
-
-#            gunzip -c ${RNAseq_1} > RNAseq_1.fastq
-#            /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U  RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
-#        fi
-
-        file_type=`echo $f | tail -c 6`
-
-        if [ "$file_type" == "fastq" ]; then
-
-            ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
-        fi
-
-        if [ "$file_type" == "bam" ]; then 
-
-           echo "Convert BAM to fastq"
-
-           ${bedtoolsPath}/bedtools  bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
-           ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U  RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8  > RNAseq_transcriptome.sam
-        fi
-
-fi
-
-
-echo Sorting
-
-LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam
-
-
-if [ "${RNAseqType}" == "Illumina-paired-end" ]
-then 
-        echo IsoEM for RNAseq mapped to transcriptome
-        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam
-
-else
-        echo IsoEM for RNAseq mapped to transcriptome
-        ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam 
-fi
-
-echo Join estimates files with ci files
-
-echo ls
-#ls  ./aligned_reads_sorted/ -ltr
-
-join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 
-awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm
-join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm
-join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm
-join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm
-
-
-#echo Adding output directory to bootstap archive
-#
-#echo ls
-#ls  ./aligned_reads_sorted/ -ltr
-#
-#cd aligned_reads_sorted
-#echo ls
-#ls -ltrh
-#gunzip bootstrap.tar.gz
-#tar rf bootstrap.tar output
-#gzip bootstrap.tar
-mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap}
-
-
-#echo ls after gz
-#ls -ltr
-#
-#cd ..
-#pwd
-
-
-#gunzip ./aligned_reads_sorted/bootstrap.tar.gz
-#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output
-#gzip ./aligned_reads_sorted/bootstrap.tar 
-
-echo ls after gz
-ls -ltr
-
-#4. Copy output files
-#############################################################
-mv gene_fpkm ${out_gene_fpkm}
-mv gene_tpm ${out_gene_tpm} 
-mv iso_fpkm ${out_iso_fpkm}
-mv iso_tpm ${out_iso_tpm}
-
-#5.Remove files
-#############################################################
-rm RNAseq_transcriptome.sam
-rm aligned_reads_sorted.sam
-rm -rf aligned_reads_sorted
-
-echo "done"
-date
-
-
-
-
--- a/isoem_wrapper.xml	Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,161 +0,0 @@
-<tool id="isoem" name="IsoEM2" version="1.0.0">
-    <description> Infers isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data</description>
-    <requirements>
-        
-    </requirements>
-    <command interpreter="bash">
-        isoem_wrapper.sh
-
-            ## Provide outputs.
-            --out_gene_fpkm $out_gene_fpkm
-            --out_gene_tpm $out_gene_tpm
-            --out_iso_fpkm $out_iso_fpkm
-            --out_iso_tpm $out_iso_tpm
-            --out_bootstrap $out_bootstrap
-
-            ## Handle reference file .
-            #if $referenceSource.CCDSsource == "history":
-                --fastaFile $referenceSource.fastaFile
-            #else:
-                --GTF $referenceSource.index.fields.GTF --TMAP_INDEX $referenceSource.index.fields.TMAP_INDEX --HISAT2_INDEX $referenceSource.index.fields.HISAT2_INDEX --Cluster $referenceSource.index.fields.Cluster
-            #end if
-	   
-            ## First input file always required fastq1.
-            --input1 $Data.input1
-
-            ## Set params based on whether reads are single-end or paired.
-            #if $Data.RNAseqType == "Illumina-paired-end":
-  		--input2 $Data.input2
-	    #else:
-                -m $Data.lengthMean
-	        -d $Data.lengthSd
-	    #end if
-
-            ## RNA-Seq type based on sequencing platform.
-            --RNA_type $Data.RNAseqType &gt; $Run 2&gt;&amp;1
-
-        
-                          
-    </command>
-    <inputs>
-        <conditional name="referenceSource">
-          <param name="CCDSsource" type="select" label="Will you upload a reference transcriptome fasta file from your history or use a built-in reference?" help="Built-ins were indexed using default options">
-            <option value="indexed">Use a built-in reference</option>
-            <option value="history">Use reference from the history</option>
-          </param>
-          <when value="indexed">
-            <param name="index" type="select" label="Select a reference dataset" help="If your reference of interest is not listed, contact the Galaxy team">
-              <options from_data_table="IsoEM" />
-            </param>
-          </when>
-          <when value="history">
-            <param name="fastaFile" type="data" format="fasta" metadata_name="dbkey" label="Select CCDS fasta file from your history" />
-          </when>  <!-- history -->
-        </conditional>  <!-- referenceSource -->
-        <conditional name="Data">
-<!--
-            <param name="sPaired" type="select" label="Is this library Single-end or Paired-end?">
-              <option value="single">Single-end</option>
-              <option value="paired">Paired-end</option>
-            </param>
--->
-            <param name="RNAseqType" type="select" label="Select RNA-seq type">
-	        <option value="Ion-Torrent-Proton">Ion Torrent single-end</option>
-        	<option value="Illumina-paired-end">Illumina paired-end</option>
-	        <option value="Illumina-single-end">Illumina single-end</option>
-            </param>  <!-- RNAseqType -->
-            <when value="Illumina-paired-end">
-              <param name="input1" type="data" label="RNA-Seq file1, fastq or bam format" />
-              <param name="input2" type="data" label="RNA-Seq file2, fastq or bam format" /> 
-	    </when>
-            <when value="Ion-Torrent-Proton">
-	      <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" />
-              <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" />
-	      <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> 
-            </when>
-            <when value="Illumina-single-end">
-	      <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" />
-              <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" />
-	      <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> 
-            </when>
-        </conditional>  <!-- Data -->
-<!--
-        <param name="RNAseqType" type="select" label="Select RNA-seq type">
-           <option value="Ion-Torrent-Proton">Ion Torrent Proton</option>
-           <option value="Illumina-paired-end">Illumina paired-end</option>
-           <option value="Illumina-single-end">Illumina single-end</option>
-        </param>  
--->
-    </inputs>
-    <outputs>
-        <data name="out_gene_fpkm" format="tabular" label="Gene_fpkm"/>
-    	<data name="out_gene_tpm" format="tabular" label="Gene_tpm"/>
-    	<data name="out_iso_fpkm" format="tabular" label="Iso_fpkm"/>
-    	<data name="out_iso_tpm" format="tabular" label="Iso_tpm"/>
-	<data name="out_bootstrap" format="toolshed.gz" label="Bootstrap.tar.gz"/>
-        <data name="Run" format="log"  label="isoem_wrapper: The log file" />
-    </outputs>
-<help>
-**What it does**
-
-* The IsoEM can be used to infer isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data. 
-
-**Input Format**
-
-* The tool accept the fastq, fastq.gz, bam formats. Extension must be specified at the end of the file names.
-* RNA-seq data must be Ion Torrent Proton or Illumina sequncing data.
-
------
-
-
-**BUILT-IN REFERENCE documentation**
-
-**mm10_C57BL/6:** 
-
-* GTF file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna.GTF
-* TMAP_index:/import1/tmap-index/tmap3.4.1/mm10/CCDS_nucleotide.20140407.fna
-* HISAT2_index: /import1/hisat2-index/mm10_CCDS/mm10_CCDS_nucleotide.20140407
-* Cluster file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna_transcriptID_geneName.txt
-
-**mm10_BALB/c:**
-
-* GTF file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna.GTF
-* TMAP_index: /import1/tmap-index/tmap3.4.1/mm10/mm10_CCDS_nucleotide.20140407_BALBc.fna
-* HISAT2_index: /import1/hisat2-index/mm10_CCDS/mm10_CCDS_nucleotide.20140407_BALBc
-* Cluster file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna_transcriptID_geneName.txt
-
-**hg19**
-
-* GTF file: /import1/CCDS/HsGRCh37.1/HsGRCh37.1_CCDS_nucleotide.20131129.fa.GTF
-* TMAP_index: /import1/tmap-index/tmap3.4.1/hg19/hg19_CCDS_nucleotide.20131129.fa
-* HISAT2_index: /import1/hisat2-index/hg19/hg19_CCDS_nucleotide.20131129.fna
-* Cluster file: /import1/CCDS/HsGRCh37.1/HsGRCh37.1_CCDS.20131129_transcriptID_geneName.txt
-
-**hg38**
-
-* GTF file: /import1/CCDS/GRCh38.p2/GRCh38.p2_CCDS_nucleotide.20150512.fna.GTF
-* TMAP_index: /import1/tmap-index/tmap3.4.1/hg38/hg38_CCDS_nucleotide.20150512.fna
-* HISAT2_index: /import1/hisat2-index/hg38_CCDS_downloadedRef/h19_CCDS_nucleotide.20150512.fna
-* Cluster file: /import1/CCDS/GRCh38.p2/GRCh38.p2_CCDS.20150512_transcriptID_geneName.txt
-	
------
-
-**Output Format**
-
-* Four output files containinag results for **Gene FPKM**, **Gene TPM**, **Isoform FPKM**, and **Isoform TPM**. The four files have identical format with the following fields.
-
-
-* 1 Gene/Isoform ID 
-* 2 Gene/Isoform FPKM (Fragments Per Kilobase per Million reads) or TPM (Transcripts per Million reads) 
-* 3 Min FPKM/TPM
-* 4 Max FPKM/TPM
-
-* And one compressed **Bootstrap.tar** file will be used in IsoDE2 to compute gene differential expression.
-</help>
-
-
-</tool>
-
-
-
-
--- a/tool_data_table_conf.xml	Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc-->
-<tables>
-    <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq -->
-    <table name="IsoEM" comment_char="#">
-        <columns>value, GTF, CCDS_INDEX, Cluster </columns>
-        <file path="tool-data/IsoEM.loc" />
-    </table>
-
-</tables>
-
--- a/tool_data_table_conf.xml.sample	Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc-->
-<tables>
-    <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq -->
-    <table name="IsoEM" comment_char="#">
-        <columns>value, GTF, CCDS_INDEX, Cluster </columns>
-        <file path="tool-data/IsoEM.loc" />
-    </table>
-
-</tables>
-