# HG changeset patch # User saharlcc # Date 1489686243 14400 # Node ID 78d03bf22a1f534ba4b75ad6799eac57fa309430 # Parent ce0a125b3cd16cba2ecee3f13bade6d3e03e25a1 - Add prinseq command to filter RNA-Seq data - Fix in interpreting p-value when replicates are used diff -r ce0a125b3cd1 -r 78d03bf22a1f FC_Filter_IsoDE_wrapper.sh --- a/FC_Filter_IsoDE_wrapper.sh Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -#!/bin/bash - -tempDir=/galaxy-prod/tmp - -inputfile=${1} -inputfile2=${2} -outputfile=${3} -inputFC=${4} - - -if [ "${inputfile2}" == "1" ] -then - echo - awk -v FC=$inputFC '{if (NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) print $0}' $inputfile > $outputfile -#exit; - -elif [ "${inputfile2}" == "2" ] -then - awk -v FC=${inputFC} '{if (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2)))) print $0}' $inputfile > $outputfile -else - awk -v FC=${inputFC} '{if ((NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) || (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2))))) print $0}' $inputfile > $outputfile -fi - -echo "done" -date - - - - - -#logx(y) = logn(y)/logn(x) - -#The NR==1 condition makes sure the file header gets printed \ No newline at end of file diff -r ce0a125b3cd1 -r 78d03bf22a1f FC_Filter_IsoDE_wrapper.xml --- a/FC_Filter_IsoDE_wrapper.xml Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ - - Filters IsoDE2 output based on fold change - - FC_Filter_IsoDE_wrapper.sh $input1 $input2 $out_file $FC_min - - - - - - - - - - - - - - - - - - -**What it does** - -This filter selects from the IsoDE2 output file, genes that genes that have fold change (ratio between the gene expression in the two conditions in comparison) greater -than or equal to the a certain threshold set by the user. - - -**Input** - -* 1- IsoDE2 output file. It can be any of the output files generated by an IsoDE2 runs; for genes or isoforms/FPKM or TPM -* 2- Direction of over expression of interest to the user (genes over expressed in condition1, over expressed in condition 2, or overexpressed in either condition) -* 3- Minimim fold change of interest -* -* - - - - -**Output** - - -The output is a tabular file with the same fields as the input file, containing the subset of genes satisfying the conditions specified by the used. - - - - - - - - - - - - - diff -r ce0a125b3cd1 -r 78d03bf22a1f IsoEM.loc --- a/IsoEM.loc Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2 - -#NOTE: All entries in this file MUST be tab-delimited - -#Every entry has the following 5 fields: -#Reference name - - - -mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt -hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt - \ No newline at end of file diff -r ce0a125b3cd1 -r 78d03bf22a1f IsoEM.loc.sample --- a/IsoEM.loc.sample Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2 - -#NOTE: All entries in this file MUST be tab-delimited - -#Every entry has the following 5 fields: -#Reference name - - - -mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt -hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt - \ No newline at end of file diff -r ce0a125b3cd1 -r 78d03bf22a1f isoDE.xml --- a/isoDE.xml Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ - - Compute gene Differential Expression based on IsoEM2 output - isoDE2.sh - -c1 - $condition1 - #for $r in $condition1replicates - ${r.c1Rep} - #end for - -c2 - $condition2 - #for $r in $condition2replicates - ${r.c2Rep} - #end for - -pval $pval - -geneFPKMout $geneFPKM - -geneTPMout $geneTPM - -isoFPKMout $isoformFPKM - -isoTPMout $isoformTPM - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Computes gene and isoform differential expression between two conditions (example tumor and normal) for both Fragment per Kilobase of transcript length per Million -bases (FPKM) and Transcripts per Million (TPM) values. The computation is based on the boostraping output generated by IsoEM2. The number of bootstrap iterations for -IsoEM2 should be >= 20 (suggested 200). - -**Input** - -* - One or more IsoEM output files (compressed tar files) for each of the two conditions. More than one file can be used if there are replicated for either condition -* - Desired p-value to for which a reliable fold change level will be reported -* - - - - -**Output** - -* four output files containinag results for Gene FPKM DE, Gene TPM DE, Isoform FPKM DE, and Isoform TPM DE. The four files have identical format with the following fields -* 1- Gene/isoform ID -* 2- Reliable log_2(FC) : conservative estimate of fold change in log base 2. -* For the confidence level specified by the input p-value, fold change of gene/isoform abundance (FPKM/TPM) in condition 2 compared condition 1 is -* at least 2 ^ absoulte value of this field.The sign indicates the direction, +ve means over expressed in condition 2, -ve means underexpressed in -* condition 1. NDE indicates that no change was detected. -* 3- log_2(condition 1 FPKM (or TPM)/condition 1 FPKM(or TPM)) based on IsoEM2 run without bootstrapping -* 4- condition 1 FPKM (or TPM) based on IsoEM2 run without bootstrapping -* 5- condition 2 FPKM (or TPM) based on IsoEM2 run without bootstrapping - - - - diff -r ce0a125b3cd1 -r 78d03bf22a1f isoDE2.sh --- a/isoDE2.sh Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -#!/bin/bash - -isoEMDir=/home/projects/isoem2/isoem-workingversion -toolpath=${isoEMDir}/bin -isoDEPath=${isoEMDir}/bin -supportCalcPath=${isoEMDir}/src/calc -fpkmGeneCommand="" -fpkmIsoformCommand="" -tpmGeneCommand="" -tpmIsoformCommand="" - - -arg=($*) -i=0 -while [ $i -lt $# ] -do - a=${arg[i]} - if [ "$a" == "-c1" ]; then - fpkmGeneCommand="$fpkmGeneCommand -c1" - fpkmIsoformCommand="$fpkmIsoformCommand -c1" - tpmGeneCommand="$tpmGeneCommand -c1" - tpmIsoformCommand="$tpmIsoformCommand -c1" - - ((i++)) - a=${arg[i]} - rep=1 - while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]] - do - condition1File=$a - ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c1_rep${rep} - - fpkmGeneCommand="$fpkmGeneCommand c1_rep${rep}_fpkm_G" - fpkmIsoformCommand="$fpkmIsoformCommand c1_rep${rep}_fpkm_I" - tpmGeneCommand="$tpmGeneCommand c1_rep${rep}_tpm_G" - tpmIsoformCommand="$tpmIsoformCommand c1_rep${rep}_tpm_I" - ((rep++)) - - ((i++)) - a=${arg[i]} - done - elif [ "$a" == "-c2" ]; then - fpkmGeneCommand="$fpkmGeneCommand -c2" - fpkmIsoformCommand="$fpkmIsoformCommand -c2" - tpmGeneCommand="$tpmGeneCommand -c2" - tpmIsoformCommand="$tpmIsoformCommand -c2" - - ((i++)) - a=${arg[i]} - rep=1 - while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]] - do - condition1File=$a - #echo $condition1File - ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c2_rep${rep} - - fpkmGeneCommand="$fpkmGeneCommand c2_rep${rep}_fpkm_G" - fpkmIsoformCommand="$fpkmIsoformCommand c2_rep${rep}_fpkm_I" - tpmGeneCommand="$tpmGeneCommand c2_rep${rep}_tpm_G" - tpmIsoformCommand="$tpmIsoformCommand c2_rep${rep}_tpm_I" - ((rep++)) - - ((i++)) - a=${arg[i]} - done - elif [ "$a" == "-pval" ]; then - ((i++)) - pval=${arg[i]} - ((i++)) - - - elif [ "$a" == "-geneFPKMout" ]; then - ((i++)) - geneFPKMout_file=${arg[i]} - ((i++)) - - - elif [ "$a" == "-geneTPMout" ]; then - ((i++)) - geneTPMout_file=${arg[i]} - ((i++)) - - elif [ "$a" == "-isoFPKMout" ]; then - ((i++)) - isoFPKMout_file=${arg[i]} - ((i++)) - - elif [ "$a" == "-isoTPMout" ]; then - ((i++)) - isoTPMout_file=${arg[i]} - ((i++)) - else - - ((i++)) - fi - -done - -support=`java -cp ${supportCalcPath} support 200 200 $pval` -fpkmGeneCommand="$fpkmGeneCommand -b $support" -fpkmIsoformCommand="$fpkmIsoformCommand -b $support" -tpmGeneCommand="$tpmGeneCommand -b $support" -tpmIsoformCommand="$tpmIsoformCommand -b $support" - -fpkmGeneCommand="$fpkmGeneCommand -dfc 2" -fpkmIsoformCommand="$fpkmIsoformCommand -dfc 2" -tpmGeneCommand="$tpmGeneCommand -dfc 2" -tpmIsoformCommand="$tpmIsoformCommand -dfc 2" - - -#if [ "${out_prefix}" == "$(basename ${out_prefix} )" ]; then -# pth=`pwd` -# out_prefix=${pth}/${out_prefix} -#fi - - -echo GENE FPKM -echo moving start -date - -mkdir fpkm_G -cd fpkm_G -mv ../c*_fpkm_G . - - -echo isoDE start -date - - -${isoDEPath}/isodecalls $fpkmGeneCommand -out "output.txt" -#awk '{for (f=1; f<=NF; f++) {if (f == NF) printf "%s",$f; else printf "%s\t", $f }; print ""}' Bootstrap_Merge1_DIR/output.txt | sed 1,1d > ${geneFPKMout_file} - -echo awk command -date - - -awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneFPKMout_file} - -cd .. - - -echo ISOFORM FPKM - -echo moving start -date - -mkdir fpkm_I -cd fpkm_I -mv ../c*_fpkm_I . - - -echo isoDE start -date - -${isoDEPath}/isodecalls $fpkmIsoformCommand -out "output.txt" - -echo awk command -date -pwd -awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoFPKMout_file} -cd .. - - -echo ISOFORM TPM -echo moving start -date - -mkdir tpm_G -cd tpm_G -mv ../c*_tpm_G . - -echo isoDE start -date - -${isoDEPath}/isodecalls $tpmGeneCommand -out "output.txt" - -echo awk command -date -pwd -awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneTPMout_file} -cd .. - - -echo ISOFORM TPM -echo moving start -date - -mkdir tpm_I -cd tpm_I -mv ../c*_tpm_I . - -echo isoDE start -date - -${isoDEPath}/isodecalls $tpmIsoformCommand -out "output.txt" - -echo awk command -date - -awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoTPMout_file} - -cd .. - - -echo final cleanup -date -rm -fr fpkm_G fpkm_I tpm_G cd tpm_I -echo done -date - - diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/FC_Filter_IsoDE_wrapper.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/FC_Filter_IsoDE_wrapper.sh Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,33 @@ +#!/bin/bash + +tempDir=/galaxy-prod/tmp + +inputfile=${1} +inputfile2=${2} +outputfile=${3} +inputFC=${4} + + +if [ "${inputfile2}" == "1" ] +then + echo + awk -v FC=$inputFC '{if (NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) print $0}' $inputfile > $outputfile +#exit; + +elif [ "${inputfile2}" == "2" ] +then + awk -v FC=${inputFC} '{if (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2)))) print $0}' $inputfile > $outputfile +else + awk -v FC=${inputFC} '{if ((NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) || (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2))))) print $0}' $inputfile > $outputfile +fi + +echo "done" +date + + + + + +#logx(y) = logn(y)/logn(x) + +#The NR==1 condition makes sure the file header gets printed \ No newline at end of file diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/FC_Filter_IsoDE_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/FC_Filter_IsoDE_wrapper.xml Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,58 @@ + + Filters IsoDE2 output based on fold change + + FC_Filter_IsoDE_wrapper.sh $input1 $input2 $out_file $FC_min + + + + + + + + + + + + + + + + + + + + +**What it does** + +This filter selects from the IsoDE2 output file, genes that genes that have fold change (ratio between the gene expression in the two conditions in comparison) greater +than or equal to the a certain threshold set by the user. + + +**Input** + +* 1- IsoDE2 output file. It can be any of the output files generated by an IsoDE2 runs; for genes or isoforms/FPKM or TPM +* 2- Direction of over expression of interest to the user (genes over expressed in condition1, over expressed in condition 2, or overexpressed in either condition) +* 3- Minimim fold change of interest +* +* + + + + +**Output** + + +The output is a tabular file with the same fields as the input file, containing the subset of genes satisfying the conditions specified by the used. + + + + + + + + + + + + + diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/IsoEM.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/IsoEM.loc Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,12 @@ +#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2 + +#NOTE: All entries in this file MUST be tab-delimited + +#Every entry has the following 5 fields: +#Reference name + + + +mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt +hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt + \ No newline at end of file diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/IsoEM.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/IsoEM.loc.sample Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,12 @@ +#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2 + +#NOTE: All entries in this file MUST be tab-delimited + +#Every entry has the following 5 fields: +#Reference name + + + +mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt +hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt + \ No newline at end of file diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/README.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/README.txt Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,11 @@ +To install IsoEM2, IsoDE2 and the FC filter + +1) Follow Galaxy instructions for installing a tool from the Tool Shed +2) Download and install IsoEM2/IsoDE2 (https://github.com/mandricigor/isoem2) +3) Install other dependencies: +- tmap (needed for ION Torrent data) +- hisat2 (needed for Illumina data) +- prinseq +- bedtools +4) Edit isoem_wrapper.sh and IsoDE2.sh. Change tool paths to where they are installed on your system + diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/isoDE.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/isoDE.xml Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,79 @@ + + Compute gene Differential Expression based on IsoEM2 output + isoDE2.sh + -c1 + $condition1 + #for $r in $condition1replicates + ${r.c1Rep} + #end for + -c2 + $condition2 + #for $r in $condition2replicates + ${r.c2Rep} + #end for + -pval $pval + -geneFPKMout $geneFPKM + -geneTPMout $geneTPM + -isoFPKMout $isoformFPKM + -isoTPMout $isoformTPM + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Computes gene and isoform differential expression between two conditions (example tumor and normal) for both Fragment per Kilobase of transcript length per Million +bases (FPKM) and Transcripts per Million (TPM) values. The computation is based on the boostraping output generated by IsoEM2. The number of bootstrap iterations for +IsoEM2 should be >= 20 (suggested 200). + +**Input** + +* - One or more IsoEM output files (compressed tar files) for each of the two conditions. More than one file can be used if there are replicated for either condition +* - Desired p-value to for which a reliable fold change level will be reported +* + + + + +**Output** + +* four output files containinag results for Gene FPKM DE, Gene TPM DE, Isoform FPKM DE, and Isoform TPM DE. The four files have identical format with the following fields +* 1- Gene/isoform ID +* 2- Reliable log_2(FC) : conservative estimate of fold change in log base 2. +* For the confidence level specified by the input p-value, fold change of gene/isoform abundance (FPKM/TPM) in condition 2 compared condition 1 is +* at least 2 ^ absoulte value of this field.The sign indicates the direction, +ve means over expressed in condition 2, -ve means underexpressed in +* condition 1. 0 indicates that no change was detected. +* 3- log_2(condition 2 FPKM (or TPM)/condition 1 FPKM(or TPM)) based on IsoEM2 run without bootstrapping +* 4- condition 1 FPKM (or TPM) based on IsoEM2 run without bootstrapping +* 5- condition 2 FPKM (or TPM) based on IsoEM2 run without bootstrapping + + + + diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/isoDE2.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/isoDE2.sh Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,218 @@ +#!/bin/bash + +isoEMDir=/galaxy/galaxy/tools-dependencies/bin/isoem2/ +toolpath=${isoEMDir}/bin +isoDEPath=${isoEMDir}/bin +supportCalcPath=${isoEMDir}/src/calc +fpkmGeneCommand="" +fpkmIsoformCommand="" +tpmGeneCommand="" +tpmIsoformCommand="" + +numberOfBootstrapIterationsPerSample=199 + +arg=($*) +i=0 +while [ $i -lt $# ] +do + a=${arg[i]} + if [ "$a" == "-c1" ]; then + fpkmGeneCommand="$fpkmGeneCommand -c1" + fpkmIsoformCommand="$fpkmIsoformCommand -c1" + tpmGeneCommand="$tpmGeneCommand -c1" + tpmIsoformCommand="$tpmIsoformCommand -c1" + + ((i++)) + a=${arg[i]} + rep=1 + while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]] + do + condition1File=$a + ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c1_rep${rep} + + fpkmGeneCommand="$fpkmGeneCommand c1_rep${rep}_fpkm_G" + fpkmIsoformCommand="$fpkmIsoformCommand c1_rep${rep}_fpkm_I" + tpmGeneCommand="$tpmGeneCommand c1_rep${rep}_tpm_G" + tpmIsoformCommand="$tpmIsoformCommand c1_rep${rep}_tpm_I" + ((rep++)) + + ((i++)) + a=${arg[i]} + done + elif [ "$a" == "-c2" ]; then + fpkmGeneCommand="$fpkmGeneCommand -c2" + fpkmIsoformCommand="$fpkmIsoformCommand -c2" + tpmGeneCommand="$tpmGeneCommand -c2" + tpmIsoformCommand="$tpmIsoformCommand -c2" + + ((i++)) + a=${arg[i]} + rep=1 + while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]] + do + condition1File=$a + #echo $condition1File + ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c2_rep${rep} + + fpkmGeneCommand="$fpkmGeneCommand c2_rep${rep}_fpkm_G" + fpkmIsoformCommand="$fpkmIsoformCommand c2_rep${rep}_fpkm_I" + tpmGeneCommand="$tpmGeneCommand c2_rep${rep}_tpm_G" + tpmIsoformCommand="$tpmIsoformCommand c2_rep${rep}_tpm_I" + ((rep++)) + + ((i++)) + a=${arg[i]} + done + elif [ "$a" == "-pval" ]; then + ((i++)) + pval=${arg[i]} + ((i++)) + + + elif [ "$a" == "-geneFPKMout" ]; then + ((i++)) + geneFPKMout_file=${arg[i]} + ((i++)) + + + elif [ "$a" == "-geneTPMout" ]; then + ((i++)) + geneTPMout_file=${arg[i]} + ((i++)) + + elif [ "$a" == "-isoFPKMout" ]; then + ((i++)) + isoFPKMout_file=${arg[i]} + ((i++)) + + elif [ "$a" == "-isoTPMout" ]; then + ((i++)) + isoTPMout_file=${arg[i]} + ((i++)) + else + + ((i++)) + fi + +done +((rep--)) +bootstrap=$(($rep*$numberOfBootstrapIterationsPerSample)) +echo boostrap iterations $bootstrap +#support=`java -cp ${supportCalcPath} support 200 200 $pval` +#support=`java -cp ${supportCalcPath} support $bootstrap $bootstrap $pval` +#Calculator assumes IsoDE does all pairs when calculation the number of ratios. It multiplies the number of bootstrap samples per condition +# changed the second parameter to make the number of ratios equal to the number of bootstrap samples (match, not all pairs) +echo calculate support based on p-value and number or replicates +support=`java -cp ${supportCalcPath} support $bootstrap 1 $pval` +fpkmGeneCommand="$fpkmGeneCommand -b $support" +fpkmIsoformCommand="$fpkmIsoformCommand -b $support" +tpmGeneCommand="$tpmGeneCommand -b $support" +tpmIsoformCommand="$tpmIsoformCommand -b $support" + +fpkmGeneCommand="$fpkmGeneCommand -dfc 2" +fpkmIsoformCommand="$fpkmIsoformCommand -dfc 2" +tpmGeneCommand="$tpmGeneCommand -dfc 2" +tpmIsoformCommand="$tpmIsoformCommand -dfc 2" + + +#if [ "${out_prefix}" == "$(basename ${out_prefix} )" ]; then +# pth=`pwd` +# out_prefix=${pth}/${out_prefix} +#fi + + +echo GENE FPKM +echo moving start +date + +mkdir fpkm_G +cd fpkm_G +mv ../c*_fpkm_G . + + +echo isoDE start +date + + +${isoDEPath}/isodecalls $fpkmGeneCommand -out "output.txt" +#awk '{for (f=1; f<=NF; f++) {if (f == NF) printf "%s",$f; else printf "%s\t", $f }; print ""}' Bootstrap_Merge1_DIR/output.txt | sed 1,1d > ${geneFPKMout_file} + +echo awk command +date + + +awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneFPKMout_file} + +cd .. + + +echo ISOFORM FPKM + +echo moving start +date + +mkdir fpkm_I +cd fpkm_I +mv ../c*_fpkm_I . + + +echo isoDE start +date + +${isoDEPath}/isodecalls $fpkmIsoformCommand -out "output.txt" + +echo awk command +date +pwd +awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoFPKMout_file} +cd .. + + +echo ISOFORM TPM +echo moving start +date + +mkdir tpm_G +cd tpm_G +mv ../c*_tpm_G . + +echo isoDE start +date + +${isoDEPath}/isodecalls $tpmGeneCommand -out "output.txt" + +echo awk command +date +pwd +awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneTPMout_file} +cd .. + + +echo ISOFORM TPM +echo moving start +date + +mkdir tpm_I +cd tpm_I +mv ../c*_tpm_I . + +echo isoDE start +date + +${isoDEPath}/isodecalls $tpmIsoformCommand -out "output.txt" + +echo awk command +date + +awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoTPMout_file} + +cd .. + + +echo final cleanup +date +rm -fr fpkm_G fpkm_I tpm_G cd tpm_I +echo done +date + + diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/isoem_wrapper.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/isoem_wrapper.sh Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,367 @@ +#!/bin/bash + + +echo $@ +echo pwd +pwd +toolpath=/galaxy-prod/galaxy/tools-dependencies +hisat2Path=${toolpath}/bin +tmapPath=${toolpath}/bin +prinseqPath=${toolpath}/bin +bedtoolsPath=${toolpath}/bin + + +#tmapPath=/usr/local/bin +#bedtoolsPath=/usr/bin +#hisat2Path=/usr/local/bin + +tempDir=/galaxy-prod/tmp + + +isoem2Path=${toolpath}/bin/isoem2/bin + +#exit; + +arg=($*) +i=0 +for a in ${arg[*]} +do +((i++)) + if [ "$a" == "--input1" ]; then + RNAseq_1=${arg[i]} + fi + + if [ "$a" == "--input2" ]; then + RNAseq_2=${arg[i]} + fi + + if [ "$a" == "--GTF" ]; then + GTF_file=${arg[i]} + fi + + if [ "$a" == "--TMAP_INDEX" ]; then + TMAP_INDEX_file=${arg[i]} + fi + + if [ "$a" == "--HISAT2_INDEX" ]; then + HISAT2_INDEX_file=${arg[i]} + fi + + if [ "$a" == "--Cluster" ]; then + Cluster_file=${arg[i]} + fi + + if [ "$a" == "-m" ]; then + M=${arg[i]} + fi + + if [ "$a" == "-d" ]; then + D=${arg[i]} + fi + + if [ "$a" == "--out_gene_fpkm" ]; then + out_gene_fpkm=${arg[i]} + fi + + if [ "$a" == "--out_gene_tpm" ]; then + out_gene_tpm=${arg[i]} + fi + + if [ "$a" == "--out_iso_fpkm" ]; then + out_iso_fpkm=${arg[i]} + fi + + if [ "$a" == "--out_iso_tpm" ]; then + out_iso_tpm=${arg[i]} + fi + + if [ "$a" == "--out_bootstrap" ]; then + out_bootstrap=${arg[i]} + fi + + if [ "$a" == "--RNA_type" ]; then + RNAseqType=${arg[i]} + fi + + if [ "$a" == "--fastaFile" ]; then + FastaFile=${arg[i]} + fi + + if [ "$a" == "--MinReadLength" ]; then + MinReadLengthNum=${arg[i]} + fi + +done + + + + +if [ "${RNAseqType}" == "Ion-Torrent-Proton" ] +then + echo ${TMAP_INDEX_file} + echo Align the RNAseq_sample fastq to transcriptome using TMAP + + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + echo file type $file_type + + if [ "$file_type" == "fastq" ]; then + + #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam + cp ${RNAseq_1} RNAseq_1.fastq + fi + + file_type=`echo $f | tail -c 4` + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq + + +# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam + fi + + echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq + + echo filter the RNA fastq QC less than 20 and duplicates + perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_len ${MinReadLengthNum} -min_qual_mean 20 + +# rm RNA_1.fastq + + echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq + + ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam + + +elif [ "${RNAseqType}" == "Ion-Torrent-Proton-mm9" ] +then + echo ${TMAP_INDEX_file} + echo Align the RNAseq_sample fastq to transcriptome using TMAP + + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + echo file type $file_type + + if [ "$file_type" == "fastq" ]; then + + #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam + cp ${RNAseq_1} RNAseq_1.fastq + fi + + file_type=`echo $f | tail -c 4` + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq + + +# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam + fi + + echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq + + echo filter the RNA fastq QC less than 20 and duplicates + perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} + +# rm RNA_1.fastq + + echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq + + ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam + + +elif [ "${RNAseqType}" == "Illumina-paired-end" ] +then + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# gunzip -c ${RNAseq_2} > RNAseq_2.fastq +# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + + if [ "$file_type" == "fastq" ]; then + + +# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 ${RNAseq_1} -2 ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + cp ${RNAseq_1} RNAseq_1.fastq + cp ${RNAseq_2} RNAseq_2.fastq + + + fi + + file_type=`echo $f | tail -c 4` + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq +# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + fi + + + echo @@@@@@ Number of raw reads not paired in Ilumina RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq RNAseq_2.fastq + + + echo filter the RNA fastq QC less than 20 and duplicates + perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -fastq2 RNAseq_2.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} + + echo @@@@@@ Number of cleaned reads not paired in Ilumina RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup_1.fastq RNA_QC_dup_2.fastq + + ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNA_QC_dup_1.fastq -2 RNA_QC_dup_2.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam + +else + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + + if [ "$file_type" == "fastq" ]; then + cp ${RNAseq_1} RNAseq_1.fastq + + #${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + fi + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq +# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + fi + + echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq + + echo filter the RNA fastq QC less than 20 and duplicates + perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} + +# rm RNA_1.fastq + + echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq + ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNA_QC_dup.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam + +fi + + +echo Sorting + +LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam + + +if [ "${RNAseqType}" == "Illumina-paired-end" ] +then + echo IsoEM for RNAseq mapped to transcriptome + ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam + +else + echo IsoEM for RNAseq mapped to transcriptome + ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam +fi + +echo Join estimates files with ci files + +echo ls +#ls ./aligned_reads_sorted/ -ltr + +join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 +awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm +join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm +join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm +join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm + + +#echo Adding output directory to bootstap archive +# +#echo ls +#ls ./aligned_reads_sorted/ -ltr +# +#cd aligned_reads_sorted +#echo ls +#ls -ltrh +#gunzip bootstrap.tar.gz +#tar rf bootstrap.tar output +#gzip bootstrap.tar +mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap} + + +#echo ls after gz +#ls -ltr +# +#cd .. +#pwd + + +#gunzip ./aligned_reads_sorted/bootstrap.tar.gz +#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output +#gzip ./aligned_reads_sorted/bootstrap.tar + +echo ls after gz +ls -ltr + +#4. Copy output files +############################################################# +mv gene_fpkm ${out_gene_fpkm} +mv gene_tpm ${out_gene_tpm} +mv iso_fpkm ${out_iso_fpkm} +mv iso_tpm ${out_iso_tpm} + +#5.Remove files +############################################################# +rm RNAseq_transcriptome.sam +rm aligned_reads_sorted.sam +rm -rf aligned_reads_sorted + +echo "done" +date + + + + diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/isoem_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/isoem_wrapper.xml Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,135 @@ + + Infers isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data + + + + + isoem_wrapper.sh + + ## Provide outputs. + --out_gene_fpkm $out_gene_fpkm + --out_gene_tpm $out_gene_tpm + --out_iso_fpkm $out_iso_fpkm + --out_iso_tpm $out_iso_tpm + --out_bootstrap $out_bootstrap + + --MinReadLength $MinReadLength + + ## Handle reference file . + #if $referenceSource.CCDSsource == "history": + --fastaFile $referenceSource.fastaFile + #else: + --GTF $referenceSource.index.fields.GTF --TMAP_INDEX $referenceSource.index.fields.TMAP_INDEX --HISAT2_INDEX $referenceSource.index.fields.HISAT2_INDEX --Cluster $referenceSource.index.fields.Cluster + #end if + + ## First input file always required fastq1. + --input1 $Data.input1 + + ## Set params based on whether reads are single-end or paired. + #if $Data.RNAseqType == "Illumina-paired-end": + --input2 $Data.input2 + #else: + -m $Data.lengthMean + -d $Data.lengthSd + #end if + + ## RNA-Seq type based on sequencing platform. + --RNA_type $Data.RNAseqType > $Run 2>&1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +* The IsoEM can be used to infer isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data. + +**Input Format** + +* The tool accept the fastq, fastq.gz, bam formats. Extension must be specified at the end of the file names. +* RNA-seq data must be Ion Torrent Proton or Illumina sequncing data. + +----- + + +**Output Format** + +* Four output files containinag results for **Gene FPKM**, **Gene TPM**, **Isoform FPKM**, and **Isoform TPM**. The four files have identical format with the following fields. + + +* 1 Gene/Isoform ID +* 2 Gene/Isoform FPKM (Fragments Per Kilobase per Million reads) or TPM (Transcripts per Million reads) +* 3 Min FPKM/TPM +* 4 Max FPKM/TPM + +* And one compressed **Bootstrap.tar** file will be used in IsoDE2 to compute gene differential expression. + + + + + + + + diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/tool_data_table_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/tool_data_table_conf.xml Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,10 @@ + + + + + value, GTF, CCDS_INDEX, Cluster + +
+ +
+ diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem2_isode2/tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/tool_data_table_conf.xml.sample Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,10 @@ + + + + + value, GTF, CCDS_INDEX, Cluster + +
+ +
+ diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem_wrapper.sh --- a/isoem_wrapper.sh Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,256 +0,0 @@ -#!/bin/bash - - -echo $@ -echo pwd -pwd -isoEMDir=/home/projects/isoem2/isoem-workingversion -tmapPath=/usr/local/bin -bedtoolsPath=/usr/local/bin -hisat2Path=/usr/local/bin -tempDir=/tmp - - -isoem2Path=${isoEMDir}/bin - -#exit; - -arg=($*) -i=0 -for a in ${arg[*]} -do -((i++)) - if [ "$a" == "--input1" ]; then - RNAseq_1=${arg[i]} - fi - - if [ "$a" == "--input2" ]; then - RNAseq_2=${arg[i]} - fi - - if [ "$a" == "--GTF" ]; then - GTF_file=${arg[i]} - fi - - if [ "$a" == "--TMAP_INDEX" ]; then - TMAP_INDEX_file=${arg[i]} - fi - - if [ "$a" == "--HISAT2_INDEX" ]; then - HISAT2_INDEX_file=${arg[i]} - fi - - if [ "$a" == "--Cluster" ]; then - Cluster_file=${arg[i]} - fi - - if [ "$a" == "-m" ]; then - M=${arg[i]} - fi - - if [ "$a" == "-d" ]; then - D=${arg[i]} - fi - - if [ "$a" == "--out_gene_fpkm" ]; then - out_gene_fpkm=${arg[i]} - fi - - if [ "$a" == "--out_gene_tpm" ]; then - out_gene_tpm=${arg[i]} - fi - - if [ "$a" == "--out_iso_fpkm" ]; then - out_iso_fpkm=${arg[i]} - fi - - if [ "$a" == "--out_iso_tpm" ]; then - out_iso_tpm=${arg[i]} - fi - - if [ "$a" == "--out_bootstrap" ]; then - out_bootstrap=${arg[i]} - fi - - if [ "$a" == "--RNA_type" ]; then - RNAseqType=${arg[i]} - fi - - if [ "$a" == "--fastaFile" ]; then - FastaFile=${arg[i]} - fi -done - - - -if [ "${RNAseqType}" == "Ion-Torrent-Proton" ] -then - echo ${TMAP_INDEX_file} - echo Align the RNAseq_sample fastq to transcriptome using TMAP - - f=$(basename ${RNAseq_1}) -# file_type=`echo $f | tail -c 9` - -# if [ "$file_type" == "fastq.gz" ]; then - -# echo "Unzip fastq files" - -# gunzip -c ${RNAseq_1} > RNAseq_1.fastq -# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam -# fi - - file_type=`echo $f | tail -c 6` - - if [ "$file_type" == "fastq" ]; then - - ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam - fi - - file_type=`echo $f | tail -c 4` - - if [ "$file_type" == "bam" ]; then - - echo "Convert BAM to fastq" - - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq - ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam - fi - - -elif [ "${RNAseqType}" == "Illumina-paired-end" ] -then - f=$(basename ${RNAseq_1}) -# file_type=`echo $f | tail -c 9` - -# if [ "$file_type" == "fastq.gz" ]; then - -# echo "Unzip fastq files" - -# gunzip -c ${RNAseq_1} > RNAseq_1.fastq -# gunzip -c ${RNAseq_2} > RNAseq_2.fastq -# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam -# fi - - file_type=`echo $f | tail -c 6` - - if [ "$file_type" == "fastq" ]; then - - ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 ${RNAseq_1} -2 ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - fi - - file_type=`echo $f | tail -c 4` - - if [ "$file_type" == "bam" ]; then - - echo "Convert BAM to fastq" - - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq - ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - fi - - -else - f=$(basename ${RNAseq_1}) -# file_type=`echo $f | tail -c 9` - -# if [ "$file_type" == "fastq.gz" ]; then - -# echo "Unzip fastq files" - -# gunzip -c ${RNAseq_1} > RNAseq_1.fastq -# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam -# fi - - file_type=`echo $f | tail -c 6` - - if [ "$file_type" == "fastq" ]; then - - ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - fi - - if [ "$file_type" == "bam" ]; then - - echo "Convert BAM to fastq" - - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq - ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - fi - -fi - - -echo Sorting - -LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam - - -if [ "${RNAseqType}" == "Illumina-paired-end" ] -then - echo IsoEM for RNAseq mapped to transcriptome - ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam - -else - echo IsoEM for RNAseq mapped to transcriptome - ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam -fi - -echo Join estimates files with ci files - -echo ls -#ls ./aligned_reads_sorted/ -ltr - -join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 -awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm -join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm -join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm -join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm - - -#echo Adding output directory to bootstap archive -# -#echo ls -#ls ./aligned_reads_sorted/ -ltr -# -#cd aligned_reads_sorted -#echo ls -#ls -ltrh -#gunzip bootstrap.tar.gz -#tar rf bootstrap.tar output -#gzip bootstrap.tar -mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap} - - -#echo ls after gz -#ls -ltr -# -#cd .. -#pwd - - -#gunzip ./aligned_reads_sorted/bootstrap.tar.gz -#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output -#gzip ./aligned_reads_sorted/bootstrap.tar - -echo ls after gz -ls -ltr - -#4. Copy output files -############################################################# -mv gene_fpkm ${out_gene_fpkm} -mv gene_tpm ${out_gene_tpm} -mv iso_fpkm ${out_iso_fpkm} -mv iso_tpm ${out_iso_tpm} - -#5.Remove files -############################################################# -rm RNAseq_transcriptome.sam -rm aligned_reads_sorted.sam -rm -rf aligned_reads_sorted - -echo "done" -date - - - - diff -r ce0a125b3cd1 -r 78d03bf22a1f isoem_wrapper.xml --- a/isoem_wrapper.xml Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,161 +0,0 @@ - - Infers isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data - - - - - isoem_wrapper.sh - - ## Provide outputs. - --out_gene_fpkm $out_gene_fpkm - --out_gene_tpm $out_gene_tpm - --out_iso_fpkm $out_iso_fpkm - --out_iso_tpm $out_iso_tpm - --out_bootstrap $out_bootstrap - - ## Handle reference file . - #if $referenceSource.CCDSsource == "history": - --fastaFile $referenceSource.fastaFile - #else: - --GTF $referenceSource.index.fields.GTF --TMAP_INDEX $referenceSource.index.fields.TMAP_INDEX --HISAT2_INDEX $referenceSource.index.fields.HISAT2_INDEX --Cluster $referenceSource.index.fields.Cluster - #end if - - ## First input file always required fastq1. - --input1 $Data.input1 - - ## Set params based on whether reads are single-end or paired. - #if $Data.RNAseqType == "Illumina-paired-end": - --input2 $Data.input2 - #else: - -m $Data.lengthMean - -d $Data.lengthSd - #end if - - ## RNA-Seq type based on sequencing platform. - --RNA_type $Data.RNAseqType > $Run 2>&1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -* The IsoEM can be used to infer isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data. - -**Input Format** - -* The tool accept the fastq, fastq.gz, bam formats. Extension must be specified at the end of the file names. -* RNA-seq data must be Ion Torrent Proton or Illumina sequncing data. - ------ - - -**BUILT-IN REFERENCE documentation** - -**mm10_C57BL/6:** - -* GTF file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna.GTF -* TMAP_index:/import1/tmap-index/tmap3.4.1/mm10/CCDS_nucleotide.20140407.fna -* HISAT2_index: /import1/hisat2-index/mm10_CCDS/mm10_CCDS_nucleotide.20140407 -* Cluster file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna_transcriptID_geneName.txt - -**mm10_BALB/c:** - -* GTF file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna.GTF -* TMAP_index: /import1/tmap-index/tmap3.4.1/mm10/mm10_CCDS_nucleotide.20140407_BALBc.fna -* HISAT2_index: /import1/hisat2-index/mm10_CCDS/mm10_CCDS_nucleotide.20140407_BALBc -* Cluster file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna_transcriptID_geneName.txt - -**hg19** - -* GTF file: /import1/CCDS/HsGRCh37.1/HsGRCh37.1_CCDS_nucleotide.20131129.fa.GTF -* TMAP_index: /import1/tmap-index/tmap3.4.1/hg19/hg19_CCDS_nucleotide.20131129.fa -* HISAT2_index: /import1/hisat2-index/hg19/hg19_CCDS_nucleotide.20131129.fna -* Cluster file: /import1/CCDS/HsGRCh37.1/HsGRCh37.1_CCDS.20131129_transcriptID_geneName.txt - -**hg38** - -* GTF file: /import1/CCDS/GRCh38.p2/GRCh38.p2_CCDS_nucleotide.20150512.fna.GTF -* TMAP_index: /import1/tmap-index/tmap3.4.1/hg38/hg38_CCDS_nucleotide.20150512.fna -* HISAT2_index: /import1/hisat2-index/hg38_CCDS_downloadedRef/h19_CCDS_nucleotide.20150512.fna -* Cluster file: /import1/CCDS/GRCh38.p2/GRCh38.p2_CCDS.20150512_transcriptID_geneName.txt - ------ - -**Output Format** - -* Four output files containinag results for **Gene FPKM**, **Gene TPM**, **Isoform FPKM**, and **Isoform TPM**. The four files have identical format with the following fields. - - -* 1 Gene/Isoform ID -* 2 Gene/Isoform FPKM (Fragments Per Kilobase per Million reads) or TPM (Transcripts per Million reads) -* 3 Min FPKM/TPM -* 4 Max FPKM/TPM - -* And one compressed **Bootstrap.tar** file will be used in IsoDE2 to compute gene differential expression. - - - - - - - - diff -r ce0a125b3cd1 -r 78d03bf22a1f tool_data_table_conf.xml --- a/tool_data_table_conf.xml Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ - - - - - value, GTF, CCDS_INDEX, Cluster - -
- -
- diff -r ce0a125b3cd1 -r 78d03bf22a1f tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ - - - - - value, GTF, CCDS_INDEX, Cluster - -
- -
-