# HG changeset patch
# User saharlcc
# Date 1489686243 14400
# Node ID 78d03bf22a1f534ba4b75ad6799eac57fa309430
# Parent ce0a125b3cd16cba2ecee3f13bade6d3e03e25a1
- Add prinseq command to filter RNA-Seq data
- Fix in interpreting p-value when replicates are used
diff -r ce0a125b3cd1 -r 78d03bf22a1f FC_Filter_IsoDE_wrapper.sh
--- a/FC_Filter_IsoDE_wrapper.sh Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-tempDir=/galaxy-prod/tmp
-
-inputfile=${1}
-inputfile2=${2}
-outputfile=${3}
-inputFC=${4}
-
-
-if [ "${inputfile2}" == "1" ]
-then
- echo
- awk -v FC=$inputFC '{if (NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) print $0}' $inputfile > $outputfile
-#exit;
-
-elif [ "${inputfile2}" == "2" ]
-then
- awk -v FC=${inputFC} '{if (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2)))) print $0}' $inputfile > $outputfile
-else
- awk -v FC=${inputFC} '{if ((NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) || (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2))))) print $0}' $inputfile > $outputfile
-fi
-
-echo "done"
-date
-
-
-
-
-
-#logx(y) = logn(y)/logn(x)
-
-#The NR==1 condition makes sure the file header gets printed
\ No newline at end of file
diff -r ce0a125b3cd1 -r 78d03bf22a1f FC_Filter_IsoDE_wrapper.xml
--- a/FC_Filter_IsoDE_wrapper.xml Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-
- Filters IsoDE2 output based on fold change
-
- FC_Filter_IsoDE_wrapper.sh $input1 $input2 $out_file $FC_min
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This filter selects from the IsoDE2 output file, genes that genes that have fold change (ratio between the gene expression in the two conditions in comparison) greater
-than or equal to the a certain threshold set by the user.
-
-
-**Input**
-
-* 1- IsoDE2 output file. It can be any of the output files generated by an IsoDE2 runs; for genes or isoforms/FPKM or TPM
-* 2- Direction of over expression of interest to the user (genes over expressed in condition1, over expressed in condition 2, or overexpressed in either condition)
-* 3- Minimim fold change of interest
-*
-*
-
-
-
-
-**Output**
-
-
-The output is a tabular file with the same fields as the input file, containing the subset of genes satisfying the conditions specified by the used.
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r ce0a125b3cd1 -r 78d03bf22a1f IsoEM.loc
--- a/IsoEM.loc Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2
-
-#NOTE: All entries in this file MUST be tab-delimited
-
-#Every entry has the following 5 fields:
-#Reference name
-
-
-
-mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-
\ No newline at end of file
diff -r ce0a125b3cd1 -r 78d03bf22a1f IsoEM.loc.sample
--- a/IsoEM.loc.sample Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2
-
-#NOTE: All entries in this file MUST be tab-delimited
-
-#Every entry has the following 5 fields:
-#Reference name
-
-
-
-mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
-
\ No newline at end of file
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoDE.xml
--- a/isoDE.xml Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-
- Compute gene Differential Expression based on IsoEM2 output
- isoDE2.sh
- -c1
- $condition1
- #for $r in $condition1replicates
- ${r.c1Rep}
- #end for
- -c2
- $condition2
- #for $r in $condition2replicates
- ${r.c2Rep}
- #end for
- -pval $pval
- -geneFPKMout $geneFPKM
- -geneTPMout $geneTPM
- -isoFPKMout $isoformFPKM
- -isoTPMout $isoformTPM
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Computes gene and isoform differential expression between two conditions (example tumor and normal) for both Fragment per Kilobase of transcript length per Million
-bases (FPKM) and Transcripts per Million (TPM) values. The computation is based on the boostraping output generated by IsoEM2. The number of bootstrap iterations for
-IsoEM2 should be >= 20 (suggested 200).
-
-**Input**
-
-* - One or more IsoEM output files (compressed tar files) for each of the two conditions. More than one file can be used if there are replicated for either condition
-* - Desired p-value to for which a reliable fold change level will be reported
-*
-
-
-
-
-**Output**
-
-* four output files containinag results for Gene FPKM DE, Gene TPM DE, Isoform FPKM DE, and Isoform TPM DE. The four files have identical format with the following fields
-* 1- Gene/isoform ID
-* 2- Reliable log_2(FC) : conservative estimate of fold change in log base 2.
-* For the confidence level specified by the input p-value, fold change of gene/isoform abundance (FPKM/TPM) in condition 2 compared condition 1 is
-* at least 2 ^ absoulte value of this field.The sign indicates the direction, +ve means over expressed in condition 2, -ve means underexpressed in
-* condition 1. NDE indicates that no change was detected.
-* 3- log_2(condition 1 FPKM (or TPM)/condition 1 FPKM(or TPM)) based on IsoEM2 run without bootstrapping
-* 4- condition 1 FPKM (or TPM) based on IsoEM2 run without bootstrapping
-* 5- condition 2 FPKM (or TPM) based on IsoEM2 run without bootstrapping
-
-
-
-
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoDE2.sh
--- a/isoDE2.sh Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,210 +0,0 @@
-#!/bin/bash
-
-isoEMDir=/home/projects/isoem2/isoem-workingversion
-toolpath=${isoEMDir}/bin
-isoDEPath=${isoEMDir}/bin
-supportCalcPath=${isoEMDir}/src/calc
-fpkmGeneCommand=""
-fpkmIsoformCommand=""
-tpmGeneCommand=""
-tpmIsoformCommand=""
-
-
-arg=($*)
-i=0
-while [ $i -lt $# ]
-do
- a=${arg[i]}
- if [ "$a" == "-c1" ]; then
- fpkmGeneCommand="$fpkmGeneCommand -c1"
- fpkmIsoformCommand="$fpkmIsoformCommand -c1"
- tpmGeneCommand="$tpmGeneCommand -c1"
- tpmIsoformCommand="$tpmIsoformCommand -c1"
-
- ((i++))
- a=${arg[i]}
- rep=1
- while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]]
- do
- condition1File=$a
- ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c1_rep${rep}
-
- fpkmGeneCommand="$fpkmGeneCommand c1_rep${rep}_fpkm_G"
- fpkmIsoformCommand="$fpkmIsoformCommand c1_rep${rep}_fpkm_I"
- tpmGeneCommand="$tpmGeneCommand c1_rep${rep}_tpm_G"
- tpmIsoformCommand="$tpmIsoformCommand c1_rep${rep}_tpm_I"
- ((rep++))
-
- ((i++))
- a=${arg[i]}
- done
- elif [ "$a" == "-c2" ]; then
- fpkmGeneCommand="$fpkmGeneCommand -c2"
- fpkmIsoformCommand="$fpkmIsoformCommand -c2"
- tpmGeneCommand="$tpmGeneCommand -c2"
- tpmIsoformCommand="$tpmIsoformCommand -c2"
-
- ((i++))
- a=${arg[i]}
- rep=1
- while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]]
- do
- condition1File=$a
- #echo $condition1File
- ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c2_rep${rep}
-
- fpkmGeneCommand="$fpkmGeneCommand c2_rep${rep}_fpkm_G"
- fpkmIsoformCommand="$fpkmIsoformCommand c2_rep${rep}_fpkm_I"
- tpmGeneCommand="$tpmGeneCommand c2_rep${rep}_tpm_G"
- tpmIsoformCommand="$tpmIsoformCommand c2_rep${rep}_tpm_I"
- ((rep++))
-
- ((i++))
- a=${arg[i]}
- done
- elif [ "$a" == "-pval" ]; then
- ((i++))
- pval=${arg[i]}
- ((i++))
-
-
- elif [ "$a" == "-geneFPKMout" ]; then
- ((i++))
- geneFPKMout_file=${arg[i]}
- ((i++))
-
-
- elif [ "$a" == "-geneTPMout" ]; then
- ((i++))
- geneTPMout_file=${arg[i]}
- ((i++))
-
- elif [ "$a" == "-isoFPKMout" ]; then
- ((i++))
- isoFPKMout_file=${arg[i]}
- ((i++))
-
- elif [ "$a" == "-isoTPMout" ]; then
- ((i++))
- isoTPMout_file=${arg[i]}
- ((i++))
- else
-
- ((i++))
- fi
-
-done
-
-support=`java -cp ${supportCalcPath} support 200 200 $pval`
-fpkmGeneCommand="$fpkmGeneCommand -b $support"
-fpkmIsoformCommand="$fpkmIsoformCommand -b $support"
-tpmGeneCommand="$tpmGeneCommand -b $support"
-tpmIsoformCommand="$tpmIsoformCommand -b $support"
-
-fpkmGeneCommand="$fpkmGeneCommand -dfc 2"
-fpkmIsoformCommand="$fpkmIsoformCommand -dfc 2"
-tpmGeneCommand="$tpmGeneCommand -dfc 2"
-tpmIsoformCommand="$tpmIsoformCommand -dfc 2"
-
-
-#if [ "${out_prefix}" == "$(basename ${out_prefix} )" ]; then
-# pth=`pwd`
-# out_prefix=${pth}/${out_prefix}
-#fi
-
-
-echo GENE FPKM
-echo moving start
-date
-
-mkdir fpkm_G
-cd fpkm_G
-mv ../c*_fpkm_G .
-
-
-echo isoDE start
-date
-
-
-${isoDEPath}/isodecalls $fpkmGeneCommand -out "output.txt"
-#awk '{for (f=1; f<=NF; f++) {if (f == NF) printf "%s",$f; else printf "%s\t", $f }; print ""}' Bootstrap_Merge1_DIR/output.txt | sed 1,1d > ${geneFPKMout_file}
-
-echo awk command
-date
-
-
-awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneFPKMout_file}
-
-cd ..
-
-
-echo ISOFORM FPKM
-
-echo moving start
-date
-
-mkdir fpkm_I
-cd fpkm_I
-mv ../c*_fpkm_I .
-
-
-echo isoDE start
-date
-
-${isoDEPath}/isodecalls $fpkmIsoformCommand -out "output.txt"
-
-echo awk command
-date
-pwd
-awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoFPKMout_file}
-cd ..
-
-
-echo ISOFORM TPM
-echo moving start
-date
-
-mkdir tpm_G
-cd tpm_G
-mv ../c*_tpm_G .
-
-echo isoDE start
-date
-
-${isoDEPath}/isodecalls $tpmGeneCommand -out "output.txt"
-
-echo awk command
-date
-pwd
-awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneTPMout_file}
-cd ..
-
-
-echo ISOFORM TPM
-echo moving start
-date
-
-mkdir tpm_I
-cd tpm_I
-mv ../c*_tpm_I .
-
-echo isoDE start
-date
-
-${isoDEPath}/isodecalls $tpmIsoformCommand -out "output.txt"
-
-echo awk command
-date
-
-awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoTPMout_file}
-
-cd ..
-
-
-echo final cleanup
-date
-rm -fr fpkm_G fpkm_I tpm_G cd tpm_I
-echo done
-date
-
-
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/FC_Filter_IsoDE_wrapper.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/FC_Filter_IsoDE_wrapper.sh Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+tempDir=/galaxy-prod/tmp
+
+inputfile=${1}
+inputfile2=${2}
+outputfile=${3}
+inputFC=${4}
+
+
+if [ "${inputfile2}" == "1" ]
+then
+ echo
+ awk -v FC=$inputFC '{if (NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) print $0}' $inputfile > $outputfile
+#exit;
+
+elif [ "${inputfile2}" == "2" ]
+then
+ awk -v FC=${inputFC} '{if (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2)))) print $0}' $inputfile > $outputfile
+else
+ awk -v FC=${inputFC} '{if ((NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) || (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2))))) print $0}' $inputfile > $outputfile
+fi
+
+echo "done"
+date
+
+
+
+
+
+#logx(y) = logn(y)/logn(x)
+
+#The NR==1 condition makes sure the file header gets printed
\ No newline at end of file
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/FC_Filter_IsoDE_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/FC_Filter_IsoDE_wrapper.xml Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,58 @@
+
+ Filters IsoDE2 output based on fold change
+
+ FC_Filter_IsoDE_wrapper.sh $input1 $input2 $out_file $FC_min
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This filter selects from the IsoDE2 output file, genes that genes that have fold change (ratio between the gene expression in the two conditions in comparison) greater
+than or equal to the a certain threshold set by the user.
+
+
+**Input**
+
+* 1- IsoDE2 output file. It can be any of the output files generated by an IsoDE2 runs; for genes or isoforms/FPKM or TPM
+* 2- Direction of over expression of interest to the user (genes over expressed in condition1, over expressed in condition 2, or overexpressed in either condition)
+* 3- Minimim fold change of interest
+*
+*
+
+
+
+
+**Output**
+
+
+The output is a tabular file with the same fields as the input file, containing the subset of genes satisfying the conditions specified by the used.
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/IsoEM.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/IsoEM.loc Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,12 @@
+#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2
+
+#NOTE: All entries in this file MUST be tab-delimited
+
+#Every entry has the following 5 fields:
+#Reference name
+
+
+
+mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+
\ No newline at end of file
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/IsoEM.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/IsoEM.loc.sample Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,12 @@
+#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2
+
+#NOTE: All entries in this file MUST be tab-delimited
+
+#Every entry has the following 5 fields:
+#Reference name
+
+
+
+mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt
+
\ No newline at end of file
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/README.txt Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,11 @@
+To install IsoEM2, IsoDE2 and the FC filter
+
+1) Follow Galaxy instructions for installing a tool from the Tool Shed
+2) Download and install IsoEM2/IsoDE2 (https://github.com/mandricigor/isoem2)
+3) Install other dependencies:
+- tmap (needed for ION Torrent data)
+- hisat2 (needed for Illumina data)
+- prinseq
+- bedtools
+4) Edit isoem_wrapper.sh and IsoDE2.sh. Change tool paths to where they are installed on your system
+
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/isoDE.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/isoDE.xml Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,79 @@
+
+ Compute gene Differential Expression based on IsoEM2 output
+ isoDE2.sh
+ -c1
+ $condition1
+ #for $r in $condition1replicates
+ ${r.c1Rep}
+ #end for
+ -c2
+ $condition2
+ #for $r in $condition2replicates
+ ${r.c2Rep}
+ #end for
+ -pval $pval
+ -geneFPKMout $geneFPKM
+ -geneTPMout $geneTPM
+ -isoFPKMout $isoformFPKM
+ -isoTPMout $isoformTPM
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Computes gene and isoform differential expression between two conditions (example tumor and normal) for both Fragment per Kilobase of transcript length per Million
+bases (FPKM) and Transcripts per Million (TPM) values. The computation is based on the boostraping output generated by IsoEM2. The number of bootstrap iterations for
+IsoEM2 should be >= 20 (suggested 200).
+
+**Input**
+
+* - One or more IsoEM output files (compressed tar files) for each of the two conditions. More than one file can be used if there are replicated for either condition
+* - Desired p-value to for which a reliable fold change level will be reported
+*
+
+
+
+
+**Output**
+
+* four output files containinag results for Gene FPKM DE, Gene TPM DE, Isoform FPKM DE, and Isoform TPM DE. The four files have identical format with the following fields
+* 1- Gene/isoform ID
+* 2- Reliable log_2(FC) : conservative estimate of fold change in log base 2.
+* For the confidence level specified by the input p-value, fold change of gene/isoform abundance (FPKM/TPM) in condition 2 compared condition 1 is
+* at least 2 ^ absoulte value of this field.The sign indicates the direction, +ve means over expressed in condition 2, -ve means underexpressed in
+* condition 1. 0 indicates that no change was detected.
+* 3- log_2(condition 2 FPKM (or TPM)/condition 1 FPKM(or TPM)) based on IsoEM2 run without bootstrapping
+* 4- condition 1 FPKM (or TPM) based on IsoEM2 run without bootstrapping
+* 5- condition 2 FPKM (or TPM) based on IsoEM2 run without bootstrapping
+
+
+
+
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/isoDE2.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/isoDE2.sh Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,218 @@
+#!/bin/bash
+
+isoEMDir=/galaxy/galaxy/tools-dependencies/bin/isoem2/
+toolpath=${isoEMDir}/bin
+isoDEPath=${isoEMDir}/bin
+supportCalcPath=${isoEMDir}/src/calc
+fpkmGeneCommand=""
+fpkmIsoformCommand=""
+tpmGeneCommand=""
+tpmIsoformCommand=""
+
+numberOfBootstrapIterationsPerSample=199
+
+arg=($*)
+i=0
+while [ $i -lt $# ]
+do
+ a=${arg[i]}
+ if [ "$a" == "-c1" ]; then
+ fpkmGeneCommand="$fpkmGeneCommand -c1"
+ fpkmIsoformCommand="$fpkmIsoformCommand -c1"
+ tpmGeneCommand="$tpmGeneCommand -c1"
+ tpmIsoformCommand="$tpmIsoformCommand -c1"
+
+ ((i++))
+ a=${arg[i]}
+ rep=1
+ while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]]
+ do
+ condition1File=$a
+ ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c1_rep${rep}
+
+ fpkmGeneCommand="$fpkmGeneCommand c1_rep${rep}_fpkm_G"
+ fpkmIsoformCommand="$fpkmIsoformCommand c1_rep${rep}_fpkm_I"
+ tpmGeneCommand="$tpmGeneCommand c1_rep${rep}_tpm_G"
+ tpmIsoformCommand="$tpmIsoformCommand c1_rep${rep}_tpm_I"
+ ((rep++))
+
+ ((i++))
+ a=${arg[i]}
+ done
+ elif [ "$a" == "-c2" ]; then
+ fpkmGeneCommand="$fpkmGeneCommand -c2"
+ fpkmIsoformCommand="$fpkmIsoformCommand -c2"
+ tpmGeneCommand="$tpmGeneCommand -c2"
+ tpmIsoformCommand="$tpmIsoformCommand -c2"
+
+ ((i++))
+ a=${arg[i]}
+ rep=1
+ while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]]
+ do
+ condition1File=$a
+ #echo $condition1File
+ ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c2_rep${rep}
+
+ fpkmGeneCommand="$fpkmGeneCommand c2_rep${rep}_fpkm_G"
+ fpkmIsoformCommand="$fpkmIsoformCommand c2_rep${rep}_fpkm_I"
+ tpmGeneCommand="$tpmGeneCommand c2_rep${rep}_tpm_G"
+ tpmIsoformCommand="$tpmIsoformCommand c2_rep${rep}_tpm_I"
+ ((rep++))
+
+ ((i++))
+ a=${arg[i]}
+ done
+ elif [ "$a" == "-pval" ]; then
+ ((i++))
+ pval=${arg[i]}
+ ((i++))
+
+
+ elif [ "$a" == "-geneFPKMout" ]; then
+ ((i++))
+ geneFPKMout_file=${arg[i]}
+ ((i++))
+
+
+ elif [ "$a" == "-geneTPMout" ]; then
+ ((i++))
+ geneTPMout_file=${arg[i]}
+ ((i++))
+
+ elif [ "$a" == "-isoFPKMout" ]; then
+ ((i++))
+ isoFPKMout_file=${arg[i]}
+ ((i++))
+
+ elif [ "$a" == "-isoTPMout" ]; then
+ ((i++))
+ isoTPMout_file=${arg[i]}
+ ((i++))
+ else
+
+ ((i++))
+ fi
+
+done
+((rep--))
+bootstrap=$(($rep*$numberOfBootstrapIterationsPerSample))
+echo boostrap iterations $bootstrap
+#support=`java -cp ${supportCalcPath} support 200 200 $pval`
+#support=`java -cp ${supportCalcPath} support $bootstrap $bootstrap $pval`
+#Calculator assumes IsoDE does all pairs when calculation the number of ratios. It multiplies the number of bootstrap samples per condition
+# changed the second parameter to make the number of ratios equal to the number of bootstrap samples (match, not all pairs)
+echo calculate support based on p-value and number or replicates
+support=`java -cp ${supportCalcPath} support $bootstrap 1 $pval`
+fpkmGeneCommand="$fpkmGeneCommand -b $support"
+fpkmIsoformCommand="$fpkmIsoformCommand -b $support"
+tpmGeneCommand="$tpmGeneCommand -b $support"
+tpmIsoformCommand="$tpmIsoformCommand -b $support"
+
+fpkmGeneCommand="$fpkmGeneCommand -dfc 2"
+fpkmIsoformCommand="$fpkmIsoformCommand -dfc 2"
+tpmGeneCommand="$tpmGeneCommand -dfc 2"
+tpmIsoformCommand="$tpmIsoformCommand -dfc 2"
+
+
+#if [ "${out_prefix}" == "$(basename ${out_prefix} )" ]; then
+# pth=`pwd`
+# out_prefix=${pth}/${out_prefix}
+#fi
+
+
+echo GENE FPKM
+echo moving start
+date
+
+mkdir fpkm_G
+cd fpkm_G
+mv ../c*_fpkm_G .
+
+
+echo isoDE start
+date
+
+
+${isoDEPath}/isodecalls $fpkmGeneCommand -out "output.txt"
+#awk '{for (f=1; f<=NF; f++) {if (f == NF) printf "%s",$f; else printf "%s\t", $f }; print ""}' Bootstrap_Merge1_DIR/output.txt | sed 1,1d > ${geneFPKMout_file}
+
+echo awk command
+date
+
+
+awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneFPKMout_file}
+
+cd ..
+
+
+echo ISOFORM FPKM
+
+echo moving start
+date
+
+mkdir fpkm_I
+cd fpkm_I
+mv ../c*_fpkm_I .
+
+
+echo isoDE start
+date
+
+${isoDEPath}/isodecalls $fpkmIsoformCommand -out "output.txt"
+
+echo awk command
+date
+pwd
+awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoFPKMout_file}
+cd ..
+
+
+echo ISOFORM TPM
+echo moving start
+date
+
+mkdir tpm_G
+cd tpm_G
+mv ../c*_tpm_G .
+
+echo isoDE start
+date
+
+${isoDEPath}/isodecalls $tpmGeneCommand -out "output.txt"
+
+echo awk command
+date
+pwd
+awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneTPMout_file}
+cd ..
+
+
+echo ISOFORM TPM
+echo moving start
+date
+
+mkdir tpm_I
+cd tpm_I
+mv ../c*_tpm_I .
+
+echo isoDE start
+date
+
+${isoDEPath}/isodecalls $tpmIsoformCommand -out "output.txt"
+
+echo awk command
+date
+
+awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoTPMout_file}
+
+cd ..
+
+
+echo final cleanup
+date
+rm -fr fpkm_G fpkm_I tpm_G cd tpm_I
+echo done
+date
+
+
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/isoem_wrapper.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/isoem_wrapper.sh Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,367 @@
+#!/bin/bash
+
+
+echo $@
+echo pwd
+pwd
+toolpath=/galaxy-prod/galaxy/tools-dependencies
+hisat2Path=${toolpath}/bin
+tmapPath=${toolpath}/bin
+prinseqPath=${toolpath}/bin
+bedtoolsPath=${toolpath}/bin
+
+
+#tmapPath=/usr/local/bin
+#bedtoolsPath=/usr/bin
+#hisat2Path=/usr/local/bin
+
+tempDir=/galaxy-prod/tmp
+
+
+isoem2Path=${toolpath}/bin/isoem2/bin
+
+#exit;
+
+arg=($*)
+i=0
+for a in ${arg[*]}
+do
+((i++))
+ if [ "$a" == "--input1" ]; then
+ RNAseq_1=${arg[i]}
+ fi
+
+ if [ "$a" == "--input2" ]; then
+ RNAseq_2=${arg[i]}
+ fi
+
+ if [ "$a" == "--GTF" ]; then
+ GTF_file=${arg[i]}
+ fi
+
+ if [ "$a" == "--TMAP_INDEX" ]; then
+ TMAP_INDEX_file=${arg[i]}
+ fi
+
+ if [ "$a" == "--HISAT2_INDEX" ]; then
+ HISAT2_INDEX_file=${arg[i]}
+ fi
+
+ if [ "$a" == "--Cluster" ]; then
+ Cluster_file=${arg[i]}
+ fi
+
+ if [ "$a" == "-m" ]; then
+ M=${arg[i]}
+ fi
+
+ if [ "$a" == "-d" ]; then
+ D=${arg[i]}
+ fi
+
+ if [ "$a" == "--out_gene_fpkm" ]; then
+ out_gene_fpkm=${arg[i]}
+ fi
+
+ if [ "$a" == "--out_gene_tpm" ]; then
+ out_gene_tpm=${arg[i]}
+ fi
+
+ if [ "$a" == "--out_iso_fpkm" ]; then
+ out_iso_fpkm=${arg[i]}
+ fi
+
+ if [ "$a" == "--out_iso_tpm" ]; then
+ out_iso_tpm=${arg[i]}
+ fi
+
+ if [ "$a" == "--out_bootstrap" ]; then
+ out_bootstrap=${arg[i]}
+ fi
+
+ if [ "$a" == "--RNA_type" ]; then
+ RNAseqType=${arg[i]}
+ fi
+
+ if [ "$a" == "--fastaFile" ]; then
+ FastaFile=${arg[i]}
+ fi
+
+ if [ "$a" == "--MinReadLength" ]; then
+ MinReadLengthNum=${arg[i]}
+ fi
+
+done
+
+
+
+
+if [ "${RNAseqType}" == "Ion-Torrent-Proton" ]
+then
+ echo ${TMAP_INDEX_file}
+ echo Align the RNAseq_sample fastq to transcriptome using TMAP
+
+ f=$(basename ${RNAseq_1})
+# file_type=`echo $f | tail -c 9`
+
+# if [ "$file_type" == "fastq.gz" ]; then
+
+# echo "Unzip fastq files"
+
+# gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+# fi
+
+ file_type=`echo $f | tail -c 6`
+ echo file type $file_type
+
+ if [ "$file_type" == "fastq" ]; then
+
+ #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
+ cp ${RNAseq_1} RNAseq_1.fastq
+ fi
+
+ file_type=`echo $f | tail -c 4`
+
+ if [ "$file_type" == "bam" ]; then
+
+ echo "Convert BAM to fastq"
+
+ ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+
+
+# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+ fi
+
+ echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@
+ python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq
+
+ echo filter the RNA fastq QC less than 20 and duplicates
+ perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_len ${MinReadLengthNum} -min_qual_mean 20
+
+# rm RNA_1.fastq
+
+ echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@
+ python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq
+
+ ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam
+
+
+elif [ "${RNAseqType}" == "Ion-Torrent-Proton-mm9" ]
+then
+ echo ${TMAP_INDEX_file}
+ echo Align the RNAseq_sample fastq to transcriptome using TMAP
+
+ f=$(basename ${RNAseq_1})
+# file_type=`echo $f | tail -c 9`
+
+# if [ "$file_type" == "fastq.gz" ]; then
+
+# echo "Unzip fastq files"
+
+# gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+# fi
+
+ file_type=`echo $f | tail -c 6`
+ echo file type $file_type
+
+ if [ "$file_type" == "fastq" ]; then
+
+ #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
+ cp ${RNAseq_1} RNAseq_1.fastq
+ fi
+
+ file_type=`echo $f | tail -c 4`
+
+ if [ "$file_type" == "bam" ]; then
+
+ echo "Convert BAM to fastq"
+
+ ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+
+
+# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
+ fi
+
+ echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@
+ python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq
+
+ echo filter the RNA fastq QC less than 20 and duplicates
+ perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum}
+
+# rm RNA_1.fastq
+
+ echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@
+ python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq
+
+ ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam
+
+
+elif [ "${RNAseqType}" == "Illumina-paired-end" ]
+then
+ f=$(basename ${RNAseq_1})
+# file_type=`echo $f | tail -c 9`
+
+# if [ "$file_type" == "fastq.gz" ]; then
+
+# echo "Unzip fastq files"
+
+# gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+# gunzip -c ${RNAseq_2} > RNAseq_2.fastq
+# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
+# fi
+
+ file_type=`echo $f | tail -c 6`
+
+ if [ "$file_type" == "fastq" ]; then
+
+
+# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 ${RNAseq_1} -2 ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
+ cp ${RNAseq_1} RNAseq_1.fastq
+ cp ${RNAseq_2} RNAseq_2.fastq
+
+
+ fi
+
+ file_type=`echo $f | tail -c 4`
+
+ if [ "$file_type" == "bam" ]; then
+
+ echo "Convert BAM to fastq"
+
+ ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+ ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq
+# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
+ fi
+
+
+ echo @@@@@@ Number of raw reads not paired in Ilumina RNA tumor @@@@@@@
+ python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq RNAseq_2.fastq
+
+
+ echo filter the RNA fastq QC less than 20 and duplicates
+ perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -fastq2 RNAseq_2.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum}
+
+ echo @@@@@@ Number of cleaned reads not paired in Ilumina RNA tumor @@@@@@@
+ python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup_1.fastq RNA_QC_dup_2.fastq
+
+ ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNA_QC_dup_1.fastq -2 RNA_QC_dup_2.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam
+
+else
+ f=$(basename ${RNAseq_1})
+# file_type=`echo $f | tail -c 9`
+
+# if [ "$file_type" == "fastq.gz" ]; then
+
+# echo "Unzip fastq files"
+
+# gunzip -c ${RNAseq_1} > RNAseq_1.fastq
+# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam
+# fi
+
+ file_type=`echo $f | tail -c 6`
+
+ if [ "$file_type" == "fastq" ]; then
+ cp ${RNAseq_1} RNAseq_1.fastq
+
+ #${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
+ fi
+
+ if [ "$file_type" == "bam" ]; then
+
+ echo "Convert BAM to fastq"
+
+ ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
+# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
+ fi
+
+ echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@
+ python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq
+
+ echo filter the RNA fastq QC less than 20 and duplicates
+ perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum}
+
+# rm RNA_1.fastq
+
+ echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@
+ python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq
+ ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNA_QC_dup.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam
+
+fi
+
+
+echo Sorting
+
+LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam
+
+
+if [ "${RNAseqType}" == "Illumina-paired-end" ]
+then
+ echo IsoEM for RNAseq mapped to transcriptome
+ ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam
+
+else
+ echo IsoEM for RNAseq mapped to transcriptome
+ ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam
+fi
+
+echo Join estimates files with ci files
+
+echo ls
+#ls ./aligned_reads_sorted/ -ltr
+
+join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333
+awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm
+join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm
+join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm
+join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm
+
+
+#echo Adding output directory to bootstap archive
+#
+#echo ls
+#ls ./aligned_reads_sorted/ -ltr
+#
+#cd aligned_reads_sorted
+#echo ls
+#ls -ltrh
+#gunzip bootstrap.tar.gz
+#tar rf bootstrap.tar output
+#gzip bootstrap.tar
+mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap}
+
+
+#echo ls after gz
+#ls -ltr
+#
+#cd ..
+#pwd
+
+
+#gunzip ./aligned_reads_sorted/bootstrap.tar.gz
+#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output
+#gzip ./aligned_reads_sorted/bootstrap.tar
+
+echo ls after gz
+ls -ltr
+
+#4. Copy output files
+#############################################################
+mv gene_fpkm ${out_gene_fpkm}
+mv gene_tpm ${out_gene_tpm}
+mv iso_fpkm ${out_iso_fpkm}
+mv iso_tpm ${out_iso_tpm}
+
+#5.Remove files
+#############################################################
+rm RNAseq_transcriptome.sam
+rm aligned_reads_sorted.sam
+rm -rf aligned_reads_sorted
+
+echo "done"
+date
+
+
+
+
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/isoem_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/isoem_wrapper.xml Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,135 @@
+
+ Infers isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data
+
+
+
+
+ isoem_wrapper.sh
+
+ ## Provide outputs.
+ --out_gene_fpkm $out_gene_fpkm
+ --out_gene_tpm $out_gene_tpm
+ --out_iso_fpkm $out_iso_fpkm
+ --out_iso_tpm $out_iso_tpm
+ --out_bootstrap $out_bootstrap
+
+ --MinReadLength $MinReadLength
+
+ ## Handle reference file .
+ #if $referenceSource.CCDSsource == "history":
+ --fastaFile $referenceSource.fastaFile
+ #else:
+ --GTF $referenceSource.index.fields.GTF --TMAP_INDEX $referenceSource.index.fields.TMAP_INDEX --HISAT2_INDEX $referenceSource.index.fields.HISAT2_INDEX --Cluster $referenceSource.index.fields.Cluster
+ #end if
+
+ ## First input file always required fastq1.
+ --input1 $Data.input1
+
+ ## Set params based on whether reads are single-end or paired.
+ #if $Data.RNAseqType == "Illumina-paired-end":
+ --input2 $Data.input2
+ #else:
+ -m $Data.lengthMean
+ -d $Data.lengthSd
+ #end if
+
+ ## RNA-Seq type based on sequencing platform.
+ --RNA_type $Data.RNAseqType > $Run 2>&1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+* The IsoEM can be used to infer isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data.
+
+**Input Format**
+
+* The tool accept the fastq, fastq.gz, bam formats. Extension must be specified at the end of the file names.
+* RNA-seq data must be Ion Torrent Proton or Illumina sequncing data.
+
+-----
+
+
+**Output Format**
+
+* Four output files containinag results for **Gene FPKM**, **Gene TPM**, **Isoform FPKM**, and **Isoform TPM**. The four files have identical format with the following fields.
+
+
+* 1 Gene/Isoform ID
+* 2 Gene/Isoform FPKM (Fragments Per Kilobase per Million reads) or TPM (Transcripts per Million reads)
+* 3 Min FPKM/TPM
+* 4 Max FPKM/TPM
+
+* And one compressed **Bootstrap.tar** file will be used in IsoDE2 to compute gene differential expression.
+
+
+
+
+
+
+
+
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/tool_data_table_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/tool_data_table_conf.xml Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,10 @@
+
+
+
+
+ value, GTF, CCDS_INDEX, Cluster
+
+
+
+
+
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isoem2_isode2/tool_data_table_conf.xml.sample Thu Mar 16 13:44:03 2017 -0400
@@ -0,0 +1,10 @@
+
+
+
+
+ value, GTF, CCDS_INDEX, Cluster
+
+
+
+
+
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem_wrapper.sh
--- a/isoem_wrapper.sh Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,256 +0,0 @@
-#!/bin/bash
-
-
-echo $@
-echo pwd
-pwd
-isoEMDir=/home/projects/isoem2/isoem-workingversion
-tmapPath=/usr/local/bin
-bedtoolsPath=/usr/local/bin
-hisat2Path=/usr/local/bin
-tempDir=/tmp
-
-
-isoem2Path=${isoEMDir}/bin
-
-#exit;
-
-arg=($*)
-i=0
-for a in ${arg[*]}
-do
-((i++))
- if [ "$a" == "--input1" ]; then
- RNAseq_1=${arg[i]}
- fi
-
- if [ "$a" == "--input2" ]; then
- RNAseq_2=${arg[i]}
- fi
-
- if [ "$a" == "--GTF" ]; then
- GTF_file=${arg[i]}
- fi
-
- if [ "$a" == "--TMAP_INDEX" ]; then
- TMAP_INDEX_file=${arg[i]}
- fi
-
- if [ "$a" == "--HISAT2_INDEX" ]; then
- HISAT2_INDEX_file=${arg[i]}
- fi
-
- if [ "$a" == "--Cluster" ]; then
- Cluster_file=${arg[i]}
- fi
-
- if [ "$a" == "-m" ]; then
- M=${arg[i]}
- fi
-
- if [ "$a" == "-d" ]; then
- D=${arg[i]}
- fi
-
- if [ "$a" == "--out_gene_fpkm" ]; then
- out_gene_fpkm=${arg[i]}
- fi
-
- if [ "$a" == "--out_gene_tpm" ]; then
- out_gene_tpm=${arg[i]}
- fi
-
- if [ "$a" == "--out_iso_fpkm" ]; then
- out_iso_fpkm=${arg[i]}
- fi
-
- if [ "$a" == "--out_iso_tpm" ]; then
- out_iso_tpm=${arg[i]}
- fi
-
- if [ "$a" == "--out_bootstrap" ]; then
- out_bootstrap=${arg[i]}
- fi
-
- if [ "$a" == "--RNA_type" ]; then
- RNAseqType=${arg[i]}
- fi
-
- if [ "$a" == "--fastaFile" ]; then
- FastaFile=${arg[i]}
- fi
-done
-
-
-
-if [ "${RNAseqType}" == "Ion-Torrent-Proton" ]
-then
- echo ${TMAP_INDEX_file}
- echo Align the RNAseq_sample fastq to transcriptome using TMAP
-
- f=$(basename ${RNAseq_1})
-# file_type=`echo $f | tail -c 9`
-
-# if [ "$file_type" == "fastq.gz" ]; then
-
-# echo "Unzip fastq files"
-
-# gunzip -c ${RNAseq_1} > RNAseq_1.fastq
-# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
-# fi
-
- file_type=`echo $f | tail -c 6`
-
- if [ "$file_type" == "fastq" ]; then
-
- ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam
- fi
-
- file_type=`echo $f | tail -c 4`
-
- if [ "$file_type" == "bam" ]; then
-
- echo "Convert BAM to fastq"
-
- ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
- ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam
- fi
-
-
-elif [ "${RNAseqType}" == "Illumina-paired-end" ]
-then
- f=$(basename ${RNAseq_1})
-# file_type=`echo $f | tail -c 9`
-
-# if [ "$file_type" == "fastq.gz" ]; then
-
-# echo "Unzip fastq files"
-
-# gunzip -c ${RNAseq_1} > RNAseq_1.fastq
-# gunzip -c ${RNAseq_2} > RNAseq_2.fastq
-# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
-# fi
-
- file_type=`echo $f | tail -c 6`
-
- if [ "$file_type" == "fastq" ]; then
-
- ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 ${RNAseq_1} -2 ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
- fi
-
- file_type=`echo $f | tail -c 4`
-
- if [ "$file_type" == "bam" ]; then
-
- echo "Convert BAM to fastq"
-
- ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
- ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq
- ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
- fi
-
-
-else
- f=$(basename ${RNAseq_1})
-# file_type=`echo $f | tail -c 9`
-
-# if [ "$file_type" == "fastq.gz" ]; then
-
-# echo "Unzip fastq files"
-
-# gunzip -c ${RNAseq_1} > RNAseq_1.fastq
-# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
-# fi
-
- file_type=`echo $f | tail -c 6`
-
- if [ "$file_type" == "fastq" ]; then
-
- ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
- fi
-
- if [ "$file_type" == "bam" ]; then
-
- echo "Convert BAM to fastq"
-
- ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq
- ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam
- fi
-
-fi
-
-
-echo Sorting
-
-LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam
-
-
-if [ "${RNAseqType}" == "Illumina-paired-end" ]
-then
- echo IsoEM for RNAseq mapped to transcriptome
- ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam
-
-else
- echo IsoEM for RNAseq mapped to transcriptome
- ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam
-fi
-
-echo Join estimates files with ci files
-
-echo ls
-#ls ./aligned_reads_sorted/ -ltr
-
-join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333
-awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm
-join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm
-join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm
-join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm
-
-
-#echo Adding output directory to bootstap archive
-#
-#echo ls
-#ls ./aligned_reads_sorted/ -ltr
-#
-#cd aligned_reads_sorted
-#echo ls
-#ls -ltrh
-#gunzip bootstrap.tar.gz
-#tar rf bootstrap.tar output
-#gzip bootstrap.tar
-mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap}
-
-
-#echo ls after gz
-#ls -ltr
-#
-#cd ..
-#pwd
-
-
-#gunzip ./aligned_reads_sorted/bootstrap.tar.gz
-#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output
-#gzip ./aligned_reads_sorted/bootstrap.tar
-
-echo ls after gz
-ls -ltr
-
-#4. Copy output files
-#############################################################
-mv gene_fpkm ${out_gene_fpkm}
-mv gene_tpm ${out_gene_tpm}
-mv iso_fpkm ${out_iso_fpkm}
-mv iso_tpm ${out_iso_tpm}
-
-#5.Remove files
-#############################################################
-rm RNAseq_transcriptome.sam
-rm aligned_reads_sorted.sam
-rm -rf aligned_reads_sorted
-
-echo "done"
-date
-
-
-
-
diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem_wrapper.xml
--- a/isoem_wrapper.xml Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,161 +0,0 @@
-
- Infers isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data
-
-
-
-
- isoem_wrapper.sh
-
- ## Provide outputs.
- --out_gene_fpkm $out_gene_fpkm
- --out_gene_tpm $out_gene_tpm
- --out_iso_fpkm $out_iso_fpkm
- --out_iso_tpm $out_iso_tpm
- --out_bootstrap $out_bootstrap
-
- ## Handle reference file .
- #if $referenceSource.CCDSsource == "history":
- --fastaFile $referenceSource.fastaFile
- #else:
- --GTF $referenceSource.index.fields.GTF --TMAP_INDEX $referenceSource.index.fields.TMAP_INDEX --HISAT2_INDEX $referenceSource.index.fields.HISAT2_INDEX --Cluster $referenceSource.index.fields.Cluster
- #end if
-
- ## First input file always required fastq1.
- --input1 $Data.input1
-
- ## Set params based on whether reads are single-end or paired.
- #if $Data.RNAseqType == "Illumina-paired-end":
- --input2 $Data.input2
- #else:
- -m $Data.lengthMean
- -d $Data.lengthSd
- #end if
-
- ## RNA-Seq type based on sequencing platform.
- --RNA_type $Data.RNAseqType > $Run 2>&1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-* The IsoEM can be used to infer isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data.
-
-**Input Format**
-
-* The tool accept the fastq, fastq.gz, bam formats. Extension must be specified at the end of the file names.
-* RNA-seq data must be Ion Torrent Proton or Illumina sequncing data.
-
------
-
-
-**BUILT-IN REFERENCE documentation**
-
-**mm10_C57BL/6:**
-
-* GTF file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna.GTF
-* TMAP_index:/import1/tmap-index/tmap3.4.1/mm10/CCDS_nucleotide.20140407.fna
-* HISAT2_index: /import1/hisat2-index/mm10_CCDS/mm10_CCDS_nucleotide.20140407
-* Cluster file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna_transcriptID_geneName.txt
-
-**mm10_BALB/c:**
-
-* GTF file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna.GTF
-* TMAP_index: /import1/tmap-index/tmap3.4.1/mm10/mm10_CCDS_nucleotide.20140407_BALBc.fna
-* HISAT2_index: /import1/hisat2-index/mm10_CCDS/mm10_CCDS_nucleotide.20140407_BALBc
-* Cluster file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna_transcriptID_geneName.txt
-
-**hg19**
-
-* GTF file: /import1/CCDS/HsGRCh37.1/HsGRCh37.1_CCDS_nucleotide.20131129.fa.GTF
-* TMAP_index: /import1/tmap-index/tmap3.4.1/hg19/hg19_CCDS_nucleotide.20131129.fa
-* HISAT2_index: /import1/hisat2-index/hg19/hg19_CCDS_nucleotide.20131129.fna
-* Cluster file: /import1/CCDS/HsGRCh37.1/HsGRCh37.1_CCDS.20131129_transcriptID_geneName.txt
-
-**hg38**
-
-* GTF file: /import1/CCDS/GRCh38.p2/GRCh38.p2_CCDS_nucleotide.20150512.fna.GTF
-* TMAP_index: /import1/tmap-index/tmap3.4.1/hg38/hg38_CCDS_nucleotide.20150512.fna
-* HISAT2_index: /import1/hisat2-index/hg38_CCDS_downloadedRef/h19_CCDS_nucleotide.20150512.fna
-* Cluster file: /import1/CCDS/GRCh38.p2/GRCh38.p2_CCDS.20150512_transcriptID_geneName.txt
-
------
-
-**Output Format**
-
-* Four output files containinag results for **Gene FPKM**, **Gene TPM**, **Isoform FPKM**, and **Isoform TPM**. The four files have identical format with the following fields.
-
-
-* 1 Gene/Isoform ID
-* 2 Gene/Isoform FPKM (Fragments Per Kilobase per Million reads) or TPM (Transcripts per Million reads)
-* 3 Min FPKM/TPM
-* 4 Max FPKM/TPM
-
-* And one compressed **Bootstrap.tar** file will be used in IsoDE2 to compute gene differential expression.
-
-
-
-
-
-
-
-
diff -r ce0a125b3cd1 -r 78d03bf22a1f tool_data_table_conf.xml
--- a/tool_data_table_conf.xml Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-
-
-
-
- value, GTF, CCDS_INDEX, Cluster
-
-
-
-
-
diff -r ce0a125b3cd1 -r 78d03bf22a1f tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Mon Sep 19 22:10:01 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-
-
-
-
- value, GTF, CCDS_INDEX, Cluster
-
-
-
-
-