Mercurial > repos > saharlcc > isoem2_isode2
changeset 10:78d03bf22a1f draft
- Add prinseq command to filter RNA-Seq data
- Fix in interpreting p-value when replicates are used
author | saharlcc |
---|---|
date | Thu, 16 Mar 2017 13:44:03 -0400 |
parents | ce0a125b3cd1 |
children | 630d5a01ef13 |
files | FC_Filter_IsoDE_wrapper.sh FC_Filter_IsoDE_wrapper.xml IsoEM.loc IsoEM.loc.sample isoDE.xml isoDE2.sh isoem2_isode2/FC_Filter_IsoDE_wrapper.sh isoem2_isode2/FC_Filter_IsoDE_wrapper.xml isoem2_isode2/IsoEM.loc isoem2_isode2/IsoEM.loc.sample isoem2_isode2/README.txt isoem2_isode2/isoDE.xml isoem2_isode2/isoDE2.sh isoem2_isode2/isoem_wrapper.sh isoem2_isode2/isoem_wrapper.xml isoem2_isode2/tool_data_table_conf.xml isoem2_isode2/tool_data_table_conf.xml.sample isoem_wrapper.sh isoem_wrapper.xml tool_data_table_conf.xml tool_data_table_conf.xml.sample |
diffstat | 21 files changed, 945 insertions(+), 839 deletions(-) [+] |
line wrap: on
line diff
--- a/FC_Filter_IsoDE_wrapper.sh Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -#!/bin/bash - -tempDir=/galaxy-prod/tmp - -inputfile=${1} -inputfile2=${2} -outputfile=${3} -inputFC=${4} - - -if [ "${inputfile2}" == "1" ] -then - echo - awk -v FC=$inputFC '{if (NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) print $0}' $inputfile > $outputfile -#exit; - -elif [ "${inputfile2}" == "2" ] -then - awk -v FC=${inputFC} '{if (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2)))) print $0}' $inputfile > $outputfile -else - awk -v FC=${inputFC} '{if ((NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) || (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2))))) print $0}' $inputfile > $outputfile -fi - -echo "done" -date - - - - - -#logx(y) = logn(y)/logn(x) - -#The NR==1 condition makes sure the file header gets printed \ No newline at end of file
--- a/FC_Filter_IsoDE_wrapper.xml Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -<tool id="FC_Filter_IsoDE2" name="Fold Change Filter"> - <description>Filters IsoDE2 output based on fold change</description> - <command interpreter="bash"> - FC_Filter_IsoDE_wrapper.sh $input1 $input2 $out_file $FC_min - </command> - <inputs> - - <param name="input1" type="data" label="Select data from IsoDE output files" /> - <param name="input2" type="select" label="A list of genes/isoforms over expressed in:"> - <option value="1">Condition 1</option> - <option value="2">Condition 2</option> - <option value="3">Condition 1 or Condition 2</option> - </param> - <param name="FC_min" type="integer" value="2" label="Minimum Fold change:" /> - </inputs> - - <outputs> - <data format="tabular" name="out_file" metadata_source="input1" label="Filtered IsoDE" /> - </outputs> - - -<help> -**What it does** - -This filter selects from the IsoDE2 output file, genes that genes that have fold change (ratio between the gene expression in the two conditions in comparison) greater -than or equal to the a certain threshold set by the user. - - -**Input** - -* 1- IsoDE2 output file. It can be any of the output files generated by an IsoDE2 runs; for genes or isoforms/FPKM or TPM -* 2- Direction of over expression of interest to the user (genes over expressed in condition1, over expressed in condition 2, or overexpressed in either condition) -* 3- Minimim fold change of interest -* -* - - - - -**Output** - - -The output is a tabular file with the same fields as the input file, containing the subset of genes satisfying the conditions specified by the used. - -</help> -</tool> - - - - - - - - - -
--- a/IsoEM.loc Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2 - -#NOTE: All entries in this file MUST be tab-delimited - -#Every entry has the following 5 fields: -#Reference name <GTF> <TMAP_INDEX> <HISAT2_INDEX> <Cluster file> - - - -mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt -hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt - \ No newline at end of file
--- a/IsoEM.loc.sample Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2 - -#NOTE: All entries in this file MUST be tab-delimited - -#Every entry has the following 5 fields: -#Reference name <GTF> <TMAP_INDEX> <HISAT2_INDEX> <Cluster file> - - - -mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt -hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt - \ No newline at end of file
--- a/isoDE.xml Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -<tool id="isoDE" name="IsoDE2"> - <description>Compute gene Differential Expression based on IsoEM2 output </description> - <command interpreter="bash">isoDE2.sh - -c1 - $condition1 - #for $r in $condition1replicates - ${r.c1Rep} - #end for - -c2 - $condition2 - #for $r in $condition2replicates - ${r.c2Rep} - #end for - -pval $pval - -geneFPKMout $geneFPKM - -geneTPMout $geneTPM - -isoFPKMout $isoformFPKM - -isoTPMout $isoformTPM - </command> - - <inputs> - <param name="condition1" type="data" label="Select data for Condition 1" format="gz" help="Condition 1 isoEM2 compressed output file"/> -<!-- - <param format="toolshed.gz" name="condition1" type="data" label="Select data for Condition 1" help="Condition 1 isoEM2 compressed output file"/> ---> - <repeat name="condition1replicates" title="Replicates for Condition 1"> - <param name="c1Rep" label="Add replicate" type="data" format="gz" data_ref="condtion1" /> - </repeat> - - <param format="gz" name="condition2" type="data" label="Select data for Condition 2" help="Condition 2 isoEM2 compressed output file"/> -<!-- - <param format="toolshed.gz" name="condition2" type="data" label="Select data for Condition 2" help="Condition 2 IsoEM2 compressed output file"/> ---> - <repeat name="condition2replicates" title="Replicates for Condition 2"> - <param format="gz" name="c2Rep" label="Add replicate" type="data" data_ref="condtion2" /> - </repeat> - - - <param name="pval" label="p-value" type="float" value="0.05" help="Desired p-value to for which a reliable fold change level will be reported" /> - - </inputs> - <outputs> - <data format="tabular" name="geneFPKM" label="isoDE gene fpkm" /> - <data format="tabular" name="isoformFPKM" label="isoDE isoform fpkm" /> - <data format="tabular" name="geneTPM" label="isoDE gene tpm" /> - <data format="tabular" name="isoformTPM" label="isoDE isoform tpm" /> - </outputs> - -<help> -**What it does** - -Computes gene and isoform differential expression between two conditions (example tumor and normal) for both Fragment per Kilobase of transcript length per Million -bases (FPKM) and Transcripts per Million (TPM) values. The computation is based on the boostraping output generated by IsoEM2. The number of bootstrap iterations for -IsoEM2 should be >= 20 (suggested 200). - -**Input** - -* - One or more IsoEM output files (compressed tar files) for each of the two conditions. More than one file can be used if there are replicated for either condition -* - Desired p-value to for which a reliable fold change level will be reported -* - - - - -**Output** - -* four output files containinag results for Gene FPKM DE, Gene TPM DE, Isoform FPKM DE, and Isoform TPM DE. The four files have identical format with the following fields -* 1- Gene/isoform ID -* 2- Reliable log_2(FC) : conservative estimate of fold change in log base 2. -* For the confidence level specified by the input p-value, fold change of gene/isoform abundance (FPKM/TPM) in condition 2 compared condition 1 is -* at least 2 ^ absoulte value of this field.The sign indicates the direction, +ve means over expressed in condition 2, -ve means underexpressed in -* condition 1. NDE indicates that no change was detected. -* 3- log_2(condition 1 FPKM (or TPM)/condition 1 FPKM(or TPM)) based on IsoEM2 run without bootstrapping -* 4- condition 1 FPKM (or TPM) based on IsoEM2 run without bootstrapping -* 5- condition 2 FPKM (or TPM) based on IsoEM2 run without bootstrapping - - -</help> -</tool>
--- a/isoDE2.sh Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -#!/bin/bash - -isoEMDir=/home/projects/isoem2/isoem-workingversion -toolpath=${isoEMDir}/bin -isoDEPath=${isoEMDir}/bin -supportCalcPath=${isoEMDir}/src/calc -fpkmGeneCommand="" -fpkmIsoformCommand="" -tpmGeneCommand="" -tpmIsoformCommand="" - - -arg=($*) -i=0 -while [ $i -lt $# ] -do - a=${arg[i]} - if [ "$a" == "-c1" ]; then - fpkmGeneCommand="$fpkmGeneCommand -c1" - fpkmIsoformCommand="$fpkmIsoformCommand -c1" - tpmGeneCommand="$tpmGeneCommand -c1" - tpmIsoformCommand="$tpmIsoformCommand -c1" - - ((i++)) - a=${arg[i]} - rep=1 - while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]] - do - condition1File=$a - ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c1_rep${rep} - - fpkmGeneCommand="$fpkmGeneCommand c1_rep${rep}_fpkm_G" - fpkmIsoformCommand="$fpkmIsoformCommand c1_rep${rep}_fpkm_I" - tpmGeneCommand="$tpmGeneCommand c1_rep${rep}_tpm_G" - tpmIsoformCommand="$tpmIsoformCommand c1_rep${rep}_tpm_I" - ((rep++)) - - ((i++)) - a=${arg[i]} - done - elif [ "$a" == "-c2" ]; then - fpkmGeneCommand="$fpkmGeneCommand -c2" - fpkmIsoformCommand="$fpkmIsoformCommand -c2" - tpmGeneCommand="$tpmGeneCommand -c2" - tpmIsoformCommand="$tpmIsoformCommand -c2" - - ((i++)) - a=${arg[i]} - rep=1 - while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]] - do - condition1File=$a - #echo $condition1File - ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c2_rep${rep} - - fpkmGeneCommand="$fpkmGeneCommand c2_rep${rep}_fpkm_G" - fpkmIsoformCommand="$fpkmIsoformCommand c2_rep${rep}_fpkm_I" - tpmGeneCommand="$tpmGeneCommand c2_rep${rep}_tpm_G" - tpmIsoformCommand="$tpmIsoformCommand c2_rep${rep}_tpm_I" - ((rep++)) - - ((i++)) - a=${arg[i]} - done - elif [ "$a" == "-pval" ]; then - ((i++)) - pval=${arg[i]} - ((i++)) - - - elif [ "$a" == "-geneFPKMout" ]; then - ((i++)) - geneFPKMout_file=${arg[i]} - ((i++)) - - - elif [ "$a" == "-geneTPMout" ]; then - ((i++)) - geneTPMout_file=${arg[i]} - ((i++)) - - elif [ "$a" == "-isoFPKMout" ]; then - ((i++)) - isoFPKMout_file=${arg[i]} - ((i++)) - - elif [ "$a" == "-isoTPMout" ]; then - ((i++)) - isoTPMout_file=${arg[i]} - ((i++)) - else - - ((i++)) - fi - -done - -support=`java -cp ${supportCalcPath} support 200 200 $pval` -fpkmGeneCommand="$fpkmGeneCommand -b $support" -fpkmIsoformCommand="$fpkmIsoformCommand -b $support" -tpmGeneCommand="$tpmGeneCommand -b $support" -tpmIsoformCommand="$tpmIsoformCommand -b $support" - -fpkmGeneCommand="$fpkmGeneCommand -dfc 2" -fpkmIsoformCommand="$fpkmIsoformCommand -dfc 2" -tpmGeneCommand="$tpmGeneCommand -dfc 2" -tpmIsoformCommand="$tpmIsoformCommand -dfc 2" - - -#if [ "${out_prefix}" == "$(basename ${out_prefix} )" ]; then -# pth=`pwd` -# out_prefix=${pth}/${out_prefix} -#fi - - -echo GENE FPKM -echo moving start -date - -mkdir fpkm_G -cd fpkm_G -mv ../c*_fpkm_G . - - -echo isoDE start -date - - -${isoDEPath}/isodecalls $fpkmGeneCommand -out "output.txt" -#awk '{for (f=1; f<=NF; f++) {if (f == NF) printf "%s",$f; else printf "%s\t", $f }; print ""}' Bootstrap_Merge1_DIR/output.txt | sed 1,1d > ${geneFPKMout_file} - -echo awk command -date - - -awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneFPKMout_file} - -cd .. - - -echo ISOFORM FPKM - -echo moving start -date - -mkdir fpkm_I -cd fpkm_I -mv ../c*_fpkm_I . - - -echo isoDE start -date - -${isoDEPath}/isodecalls $fpkmIsoformCommand -out "output.txt" - -echo awk command -date -pwd -awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoFPKMout_file} -cd .. - - -echo ISOFORM TPM -echo moving start -date - -mkdir tpm_G -cd tpm_G -mv ../c*_tpm_G . - -echo isoDE start -date - -${isoDEPath}/isodecalls $tpmGeneCommand -out "output.txt" - -echo awk command -date -pwd -awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneTPMout_file} -cd .. - - -echo ISOFORM TPM -echo moving start -date - -mkdir tpm_I -cd tpm_I -mv ../c*_tpm_I . - -echo isoDE start -date - -${isoDEPath}/isodecalls $tpmIsoformCommand -out "output.txt" - -echo awk command -date - -awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="NDE";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoTPMout_file} - -cd .. - - -echo final cleanup -date -rm -fr fpkm_G fpkm_I tpm_G cd tpm_I -echo done -date - -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/FC_Filter_IsoDE_wrapper.sh Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,33 @@ +#!/bin/bash + +tempDir=/galaxy-prod/tmp + +inputfile=${1} +inputfile2=${2} +outputfile=${3} +inputFC=${4} + + +if [ "${inputfile2}" == "1" ] +then + echo + awk -v FC=$inputFC '{if (NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) print $0}' $inputfile > $outputfile +#exit; + +elif [ "${inputfile2}" == "2" ] +then + awk -v FC=${inputFC} '{if (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2)))) print $0}' $inputfile > $outputfile +else + awk -v FC=${inputFC} '{if ((NR == 1 || ($2 != "NDE" && $2 <= -(log(FC)/log(2)))) || (NR == 1 || ($2 != "NDE" && $2 >= (log(FC)/log(2))))) print $0}' $inputfile > $outputfile +fi + +echo "done" +date + + + + + +#logx(y) = logn(y)/logn(x) + +#The NR==1 condition makes sure the file header gets printed \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/FC_Filter_IsoDE_wrapper.xml Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,58 @@ +<tool id="FC_Filter_IsoDE2" name="Fold Change Filter"> + <description>Filters IsoDE2 output based on fold change</description> + <command interpreter="bash"> + FC_Filter_IsoDE_wrapper.sh $input1 $input2 $out_file $FC_min + </command> + <inputs> + + <param name="input1" type="data" label="Select data from IsoDE output files" /> + <param name="input2" type="select" label="A list of genes/isoforms over expressed in:"> + <option value="1">Condition 1</option> + <option value="2">Condition 2</option> + <option value="3">Condition 1 or Condition 2</option> + </param> + <param name="FC_min" type="integer" value="2" label="Minimum Fold change:" /> + </inputs> + + <outputs> + <data format="tabular" name="out_file" metadata_source="input1" /> +<!-- label="Filtered IsoDE" /> --> + + </outputs> + + +<help> +**What it does** + +This filter selects from the IsoDE2 output file, genes that genes that have fold change (ratio between the gene expression in the two conditions in comparison) greater +than or equal to the a certain threshold set by the user. + + +**Input** + +* 1- IsoDE2 output file. It can be any of the output files generated by an IsoDE2 runs; for genes or isoforms/FPKM or TPM +* 2- Direction of over expression of interest to the user (genes over expressed in condition1, over expressed in condition 2, or overexpressed in either condition) +* 3- Minimim fold change of interest +* +* + + + + +**Output** + + +The output is a tabular file with the same fields as the input file, containing the subset of genes satisfying the conditions specified by the used. + +</help> +</tool> + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/IsoEM.loc Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,12 @@ +#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2 + +#NOTE: All entries in this file MUST be tab-delimited + +#Every entry has the following 5 fields: +#Reference name <GTF> <TMAP_INDEX> <HISAT2_INDEX> <Cluster file> + + + +mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt +hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt + \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/IsoEM.loc.sample Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,12 @@ +#This file has file paths for the GTF, cluster file and tmap and hisat2 indices for IsoEM2 + +#NOTE: All entries in this file MUST be tab-delimited + +#Every entry has the following 5 fields: +#Reference name <GTF> <TMAP_INDEX> <HISAT2_INDEX> <Cluster file> + + + +mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt +hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt + \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/README.txt Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,11 @@ +To install IsoEM2, IsoDE2 and the FC filter + +1) Follow Galaxy instructions for installing a tool from the Tool Shed +2) Download and install IsoEM2/IsoDE2 (https://github.com/mandricigor/isoem2) +3) Install other dependencies: +- tmap (needed for ION Torrent data) +- hisat2 (needed for Illumina data) +- prinseq +- bedtools +4) Edit isoem_wrapper.sh and IsoDE2.sh. Change tool paths to where they are installed on your system +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/isoDE.xml Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,79 @@ +<tool id="isoDE" name="IsoDE2"> + <description>Compute gene Differential Expression based on IsoEM2 output </description> + <command interpreter="bash">isoDE2.sh + -c1 + $condition1 + #for $r in $condition1replicates + ${r.c1Rep} + #end for + -c2 + $condition2 + #for $r in $condition2replicates + ${r.c2Rep} + #end for + -pval $pval + -geneFPKMout $geneFPKM + -geneTPMout $geneTPM + -isoFPKMout $isoformFPKM + -isoTPMout $isoformTPM + </command> + + <inputs> + <param name="condition1" type="data" label="Select data for Condition 1" format="gz" help="Condition 1 isoEM2 compressed output file"/> +<!-- + <param format="toolshed.gz" name="condition1" type="data" label="Select data for Condition 1" help="Condition 1 isoEM2 compressed output file"/> +--> + <repeat name="condition1replicates" title="Replicates for Condition 1"> + <param name="c1Rep" label="Add replicate" type="data" format="gz" data_ref="condtion1" /> + </repeat> + + <param format="gz" name="condition2" type="data" label="Select data for Condition 2" help="Condition 2 isoEM2 compressed output file"/> +<!-- + <param format="toolshed.gz" name="condition2" type="data" label="Select data for Condition 2" help="Condition 2 IsoEM2 compressed output file"/> +--> + <repeat name="condition2replicates" title="Replicates for Condition 2"> + <param format="gz" name="c2Rep" label="Add replicate" type="data" data_ref="condtion2" /> + </repeat> + + + <param name="pval" label="p-value" type="float" value="0.05" help="Desired p-value to for which a reliable fold change level will be reported" /> + + </inputs> + <outputs> + <data format="tabular" name="geneFPKM" label="isoDE gene fpkm" /> + <data format="tabular" name="isoformFPKM" label="isoDE isoform fpkm" /> + <data format="tabular" name="geneTPM" label="isoDE gene tpm" /> + <data format="tabular" name="isoformTPM" label="isoDE isoform tpm" /> + </outputs> + +<help> +**What it does** + +Computes gene and isoform differential expression between two conditions (example tumor and normal) for both Fragment per Kilobase of transcript length per Million +bases (FPKM) and Transcripts per Million (TPM) values. The computation is based on the boostraping output generated by IsoEM2. The number of bootstrap iterations for +IsoEM2 should be >= 20 (suggested 200). + +**Input** + +* - One or more IsoEM output files (compressed tar files) for each of the two conditions. More than one file can be used if there are replicated for either condition +* - Desired p-value to for which a reliable fold change level will be reported +* + + + + +**Output** + +* four output files containinag results for Gene FPKM DE, Gene TPM DE, Isoform FPKM DE, and Isoform TPM DE. The four files have identical format with the following fields +* 1- Gene/isoform ID +* 2- Reliable log_2(FC) : conservative estimate of fold change in log base 2. +* For the confidence level specified by the input p-value, fold change of gene/isoform abundance (FPKM/TPM) in condition 2 compared condition 1 is +* at least 2 ^ absoulte value of this field.The sign indicates the direction, +ve means over expressed in condition 2, -ve means underexpressed in +* condition 1. 0 indicates that no change was detected. +* 3- log_2(condition 2 FPKM (or TPM)/condition 1 FPKM(or TPM)) based on IsoEM2 run without bootstrapping +* 4- condition 1 FPKM (or TPM) based on IsoEM2 run without bootstrapping +* 5- condition 2 FPKM (or TPM) based on IsoEM2 run without bootstrapping + + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/isoDE2.sh Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,218 @@ +#!/bin/bash + +isoEMDir=/galaxy/galaxy/tools-dependencies/bin/isoem2/ +toolpath=${isoEMDir}/bin +isoDEPath=${isoEMDir}/bin +supportCalcPath=${isoEMDir}/src/calc +fpkmGeneCommand="" +fpkmIsoformCommand="" +tpmGeneCommand="" +tpmIsoformCommand="" + +numberOfBootstrapIterationsPerSample=199 + +arg=($*) +i=0 +while [ $i -lt $# ] +do + a=${arg[i]} + if [ "$a" == "-c1" ]; then + fpkmGeneCommand="$fpkmGeneCommand -c1" + fpkmIsoformCommand="$fpkmIsoformCommand -c1" + tpmGeneCommand="$tpmGeneCommand -c1" + tpmIsoformCommand="$tpmIsoformCommand -c1" + + ((i++)) + a=${arg[i]} + rep=1 + while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]] + do + condition1File=$a + ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c1_rep${rep} + + fpkmGeneCommand="$fpkmGeneCommand c1_rep${rep}_fpkm_G" + fpkmIsoformCommand="$fpkmIsoformCommand c1_rep${rep}_fpkm_I" + tpmGeneCommand="$tpmGeneCommand c1_rep${rep}_tpm_G" + tpmIsoformCommand="$tpmIsoformCommand c1_rep${rep}_tpm_I" + ((rep++)) + + ((i++)) + a=${arg[i]} + done + elif [ "$a" == "-c2" ]; then + fpkmGeneCommand="$fpkmGeneCommand -c2" + fpkmIsoformCommand="$fpkmIsoformCommand -c2" + tpmGeneCommand="$tpmGeneCommand -c2" + tpmIsoformCommand="$tpmIsoformCommand -c2" + + ((i++)) + a=${arg[i]} + rep=1 + while [[ `expr index "$a" "/"` -ne 0 && $i -lt $# ]] + do + condition1File=$a + #echo $condition1File + ${toolpath}/convertToisoDEFormatWithIsoforms $condition1File c2_rep${rep} + + fpkmGeneCommand="$fpkmGeneCommand c2_rep${rep}_fpkm_G" + fpkmIsoformCommand="$fpkmIsoformCommand c2_rep${rep}_fpkm_I" + tpmGeneCommand="$tpmGeneCommand c2_rep${rep}_tpm_G" + tpmIsoformCommand="$tpmIsoformCommand c2_rep${rep}_tpm_I" + ((rep++)) + + ((i++)) + a=${arg[i]} + done + elif [ "$a" == "-pval" ]; then + ((i++)) + pval=${arg[i]} + ((i++)) + + + elif [ "$a" == "-geneFPKMout" ]; then + ((i++)) + geneFPKMout_file=${arg[i]} + ((i++)) + + + elif [ "$a" == "-geneTPMout" ]; then + ((i++)) + geneTPMout_file=${arg[i]} + ((i++)) + + elif [ "$a" == "-isoFPKMout" ]; then + ((i++)) + isoFPKMout_file=${arg[i]} + ((i++)) + + elif [ "$a" == "-isoTPMout" ]; then + ((i++)) + isoTPMout_file=${arg[i]} + ((i++)) + else + + ((i++)) + fi + +done +((rep--)) +bootstrap=$(($rep*$numberOfBootstrapIterationsPerSample)) +echo boostrap iterations $bootstrap +#support=`java -cp ${supportCalcPath} support 200 200 $pval` +#support=`java -cp ${supportCalcPath} support $bootstrap $bootstrap $pval` +#Calculator assumes IsoDE does all pairs when calculation the number of ratios. It multiplies the number of bootstrap samples per condition +# changed the second parameter to make the number of ratios equal to the number of bootstrap samples (match, not all pairs) +echo calculate support based on p-value and number or replicates +support=`java -cp ${supportCalcPath} support $bootstrap 1 $pval` +fpkmGeneCommand="$fpkmGeneCommand -b $support" +fpkmIsoformCommand="$fpkmIsoformCommand -b $support" +tpmGeneCommand="$tpmGeneCommand -b $support" +tpmIsoformCommand="$tpmIsoformCommand -b $support" + +fpkmGeneCommand="$fpkmGeneCommand -dfc 2" +fpkmIsoformCommand="$fpkmIsoformCommand -dfc 2" +tpmGeneCommand="$tpmGeneCommand -dfc 2" +tpmIsoformCommand="$tpmIsoformCommand -dfc 2" + + +#if [ "${out_prefix}" == "$(basename ${out_prefix} )" ]; then +# pth=`pwd` +# out_prefix=${pth}/${out_prefix} +#fi + + +echo GENE FPKM +echo moving start +date + +mkdir fpkm_G +cd fpkm_G +mv ../c*_fpkm_G . + + +echo isoDE start +date + + +${isoDEPath}/isodecalls $fpkmGeneCommand -out "output.txt" +#awk '{for (f=1; f<=NF; f++) {if (f == NF) printf "%s",$f; else printf "%s\t", $f }; print ""}' Bootstrap_Merge1_DIR/output.txt | sed 1,1d > ${geneFPKMout_file} + +echo awk command +date + + +awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneFPKMout_file} + +cd .. + + +echo ISOFORM FPKM + +echo moving start +date + +mkdir fpkm_I +cd fpkm_I +mv ../c*_fpkm_I . + + +echo isoDE start +date + +${isoDEPath}/isodecalls $fpkmIsoformCommand -out "output.txt" + +echo awk command +date +pwd +awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 FPKM\tc2 FPKM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoFPKMout_file} +cd .. + + +echo ISOFORM TPM +echo moving start +date + +mkdir tpm_G +cd tpm_G +mv ../c*_tpm_G . + +echo isoDE start +date + +${isoDEPath}/isodecalls $tpmGeneCommand -out "output.txt" + +echo awk command +date +pwd +awk '{if (NR == 1) {print "Gene ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${geneTPMout_file} +cd .. + + +echo ISOFORM TPM +echo moving start +date + +mkdir tpm_I +cd tpm_I +mv ../c*_tpm_I . + +echo isoDE start +date + +${isoDEPath}/isodecalls $tpmIsoformCommand -out "output.txt" + +echo awk command +date + +awk '{if (NR == 1) {print "Isoform ID\tConfident log2 FC\t Single run log2 FC\t c1 TPM\tc2 TPM";} else {if ($6 == 0 && $7 == 0) {two="0";} else {two=$2;} print $1 "\t" two "\t" $5 "\t" $6 "\t" $7}}' Bootstrap_Merge1_DIR/output.txt > ${isoTPMout_file} + +cd .. + + +echo final cleanup +date +rm -fr fpkm_G fpkm_I tpm_G cd tpm_I +echo done +date + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/isoem_wrapper.sh Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,367 @@ +#!/bin/bash + + +echo $@ +echo pwd +pwd +toolpath=/galaxy-prod/galaxy/tools-dependencies +hisat2Path=${toolpath}/bin +tmapPath=${toolpath}/bin +prinseqPath=${toolpath}/bin +bedtoolsPath=${toolpath}/bin + + +#tmapPath=/usr/local/bin +#bedtoolsPath=/usr/bin +#hisat2Path=/usr/local/bin + +tempDir=/galaxy-prod/tmp + + +isoem2Path=${toolpath}/bin/isoem2/bin + +#exit; + +arg=($*) +i=0 +for a in ${arg[*]} +do +((i++)) + if [ "$a" == "--input1" ]; then + RNAseq_1=${arg[i]} + fi + + if [ "$a" == "--input2" ]; then + RNAseq_2=${arg[i]} + fi + + if [ "$a" == "--GTF" ]; then + GTF_file=${arg[i]} + fi + + if [ "$a" == "--TMAP_INDEX" ]; then + TMAP_INDEX_file=${arg[i]} + fi + + if [ "$a" == "--HISAT2_INDEX" ]; then + HISAT2_INDEX_file=${arg[i]} + fi + + if [ "$a" == "--Cluster" ]; then + Cluster_file=${arg[i]} + fi + + if [ "$a" == "-m" ]; then + M=${arg[i]} + fi + + if [ "$a" == "-d" ]; then + D=${arg[i]} + fi + + if [ "$a" == "--out_gene_fpkm" ]; then + out_gene_fpkm=${arg[i]} + fi + + if [ "$a" == "--out_gene_tpm" ]; then + out_gene_tpm=${arg[i]} + fi + + if [ "$a" == "--out_iso_fpkm" ]; then + out_iso_fpkm=${arg[i]} + fi + + if [ "$a" == "--out_iso_tpm" ]; then + out_iso_tpm=${arg[i]} + fi + + if [ "$a" == "--out_bootstrap" ]; then + out_bootstrap=${arg[i]} + fi + + if [ "$a" == "--RNA_type" ]; then + RNAseqType=${arg[i]} + fi + + if [ "$a" == "--fastaFile" ]; then + FastaFile=${arg[i]} + fi + + if [ "$a" == "--MinReadLength" ]; then + MinReadLengthNum=${arg[i]} + fi + +done + + + + +if [ "${RNAseqType}" == "Ion-Torrent-Proton" ] +then + echo ${TMAP_INDEX_file} + echo Align the RNAseq_sample fastq to transcriptome using TMAP + + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + echo file type $file_type + + if [ "$file_type" == "fastq" ]; then + + #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam + cp ${RNAseq_1} RNAseq_1.fastq + fi + + file_type=`echo $f | tail -c 4` + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq + + +# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam + fi + + echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq + + echo filter the RNA fastq QC less than 20 and duplicates + perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_len ${MinReadLengthNum} -min_qual_mean 20 + +# rm RNA_1.fastq + + echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq + + ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam + + +elif [ "${RNAseqType}" == "Ion-Torrent-Proton-mm9" ] +then + echo ${TMAP_INDEX_file} + echo Align the RNAseq_sample fastq to transcriptome using TMAP + + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + echo file type $file_type + + if [ "$file_type" == "fastq" ]; then + + #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam + cp ${RNAseq_1} RNAseq_1.fastq + fi + + file_type=`echo $f | tail -c 4` + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq + + +# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam + fi + + echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq + + echo filter the RNA fastq QC less than 20 and duplicates + perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} + +# rm RNA_1.fastq + + echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq + + ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam + + +elif [ "${RNAseqType}" == "Illumina-paired-end" ] +then + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# gunzip -c ${RNAseq_2} > RNAseq_2.fastq +# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + + if [ "$file_type" == "fastq" ]; then + + +# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 ${RNAseq_1} -2 ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + cp ${RNAseq_1} RNAseq_1.fastq + cp ${RNAseq_2} RNAseq_2.fastq + + + fi + + file_type=`echo $f | tail -c 4` + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq +# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + fi + + + echo @@@@@@ Number of raw reads not paired in Ilumina RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq RNAseq_2.fastq + + + echo filter the RNA fastq QC less than 20 and duplicates + perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -fastq2 RNAseq_2.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} + + echo @@@@@@ Number of cleaned reads not paired in Ilumina RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup_1.fastq RNA_QC_dup_2.fastq + + ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNA_QC_dup_1.fastq -2 RNA_QC_dup_2.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam + +else + f=$(basename ${RNAseq_1}) +# file_type=`echo $f | tail -c 9` + +# if [ "$file_type" == "fastq.gz" ]; then + +# echo "Unzip fastq files" + +# gunzip -c ${RNAseq_1} > RNAseq_1.fastq +# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam +# fi + + file_type=`echo $f | tail -c 6` + + if [ "$file_type" == "fastq" ]; then + cp ${RNAseq_1} RNAseq_1.fastq + + #${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + fi + + if [ "$file_type" == "bam" ]; then + + echo "Convert BAM to fastq" + + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq +# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam + fi + + echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq + + echo filter the RNA fastq QC less than 20 and duplicates + perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} + +# rm RNA_1.fastq + + echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq + ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNA_QC_dup.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam + +fi + + +echo Sorting + +LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam + + +if [ "${RNAseqType}" == "Illumina-paired-end" ] +then + echo IsoEM for RNAseq mapped to transcriptome + ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam + +else + echo IsoEM for RNAseq mapped to transcriptome + ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam +fi + +echo Join estimates files with ci files + +echo ls +#ls ./aligned_reads_sorted/ -ltr + +join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 +awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm +join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm +join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm +join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm + + +#echo Adding output directory to bootstap archive +# +#echo ls +#ls ./aligned_reads_sorted/ -ltr +# +#cd aligned_reads_sorted +#echo ls +#ls -ltrh +#gunzip bootstrap.tar.gz +#tar rf bootstrap.tar output +#gzip bootstrap.tar +mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap} + + +#echo ls after gz +#ls -ltr +# +#cd .. +#pwd + + +#gunzip ./aligned_reads_sorted/bootstrap.tar.gz +#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output +#gzip ./aligned_reads_sorted/bootstrap.tar + +echo ls after gz +ls -ltr + +#4. Copy output files +############################################################# +mv gene_fpkm ${out_gene_fpkm} +mv gene_tpm ${out_gene_tpm} +mv iso_fpkm ${out_iso_fpkm} +mv iso_tpm ${out_iso_tpm} + +#5.Remove files +############################################################# +rm RNAseq_transcriptome.sam +rm aligned_reads_sorted.sam +rm -rf aligned_reads_sorted + +echo "done" +date + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/isoem_wrapper.xml Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,135 @@ +<tool id="isoem" name="IsoEM2" version="1.0.0"> + <description> Infers isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data</description> + <requirements> + + </requirements> + <command interpreter="bash"> + isoem_wrapper.sh + + ## Provide outputs. + --out_gene_fpkm $out_gene_fpkm + --out_gene_tpm $out_gene_tpm + --out_iso_fpkm $out_iso_fpkm + --out_iso_tpm $out_iso_tpm + --out_bootstrap $out_bootstrap + + --MinReadLength $MinReadLength + + ## Handle reference file . + #if $referenceSource.CCDSsource == "history": + --fastaFile $referenceSource.fastaFile + #else: + --GTF $referenceSource.index.fields.GTF --TMAP_INDEX $referenceSource.index.fields.TMAP_INDEX --HISAT2_INDEX $referenceSource.index.fields.HISAT2_INDEX --Cluster $referenceSource.index.fields.Cluster + #end if + + ## First input file always required fastq1. + --input1 $Data.input1 + + ## Set params based on whether reads are single-end or paired. + #if $Data.RNAseqType == "Illumina-paired-end": + --input2 $Data.input2 + #else: + -m $Data.lengthMean + -d $Data.lengthSd + #end if + + ## RNA-Seq type based on sequencing platform. + --RNA_type $Data.RNAseqType > $Run 2>&1 + + + + </command> + <inputs> + <conditional name="referenceSource"> + <param name="CCDSsource" type="select" label="Will you upload a reference transcriptome fasta file from your history or use a built-in reference?" help="Built-ins were indexed using default options"> + <option value="indexed">Use a built-in reference</option> + <option value="history">Use reference from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a reference dataset" help="If your reference of interest is not listed, contact the Galaxy team"> + <options from_data_table="IsoEM" /> + </param> + </when> + <when value="history"> + <param name="fastaFile" type="data" format="fasta" metadata_name="dbkey" label="Select CCDS fasta file from your history" /> + </when> <!-- history --> + </conditional> <!-- referenceSource --> + <conditional name="Data"> +<!-- + <param name="sPaired" type="select" label="Is this library Single-end or Paired-end?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> +--> + <param name="RNAseqType" type="select" label="Select RNA-seq type"> + <option value="Ion-Torrent-Proton">Ion Torrent single-end</option> + <option value="Illumina-paired-end">Illumina paired-end</option> + <option value="Illumina-single-end">Illumina single-end</option> + </param> <!-- RNAseqType --> + <when value="Illumina-paired-end"> + <param name="input1" type="data" label="RNA-Seq file1, fastq or bam format" /> + <param name="input2" type="data" label="RNA-Seq file2, fastq or bam format" /> + </when> + <when value="Ion-Torrent-Proton"> + <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" /> + <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" /> + <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> + </when> + <when value="Illumina-single-end"> + <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" /> + <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" /> + <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> + </when> + </conditional> <!-- Data --> + + <param name="MinReadLength" label="Min. read length" type="text" value="50" /> + + +<!-- + <param name="RNAseqType" type="select" label="Select RNA-seq type"> + <option value="Ion-Torrent-Proton">Ion Torrent Proton</option> + <option value="Illumina-paired-end">Illumina paired-end</option> + <option value="Illumina-single-end">Illumina single-end</option> + </param> +--> + </inputs> + <outputs> + <data name="out_gene_fpkm" format="tabular" label="Gene_fpkm"/> + <data name="out_gene_tpm" format="tabular" label="Gene_tpm"/> + <data name="out_iso_fpkm" format="tabular" label="Iso_fpkm"/> + <data name="out_iso_tpm" format="tabular" label="Iso_tpm"/> + <data name="out_bootstrap" format="toolshed.gz" label="Bootstrap.tar.gz"/> + <data name="Run" format="log" label="isoem_wrapper: The log file" /> + </outputs> +<help> +**What it does** + +* The IsoEM can be used to infer isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data. + +**Input Format** + +* The tool accept the fastq, fastq.gz, bam formats. Extension must be specified at the end of the file names. +* RNA-seq data must be Ion Torrent Proton or Illumina sequncing data. + +----- + + +**Output Format** + +* Four output files containinag results for **Gene FPKM**, **Gene TPM**, **Isoform FPKM**, and **Isoform TPM**. The four files have identical format with the following fields. + + +* 1 Gene/Isoform ID +* 2 Gene/Isoform FPKM (Fragments Per Kilobase per Million reads) or TPM (Transcripts per Million reads) +* 3 Min FPKM/TPM +* 4 Max FPKM/TPM + +* And one compressed **Bootstrap.tar** file will be used in IsoDE2 to compute gene differential expression. +</help> + + +</tool> + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/tool_data_table_conf.xml Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,10 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc--> +<tables> + <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq --> + <table name="IsoEM" comment_char="#"> + <columns>value, GTF, CCDS_INDEX, Cluster </columns> + <file path="tool-data/IsoEM.loc" /> + </table> + +</tables> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/isoem2_isode2/tool_data_table_conf.xml.sample Thu Mar 16 13:44:03 2017 -0400 @@ -0,0 +1,10 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc--> +<tables> + <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq --> + <table name="IsoEM" comment_char="#"> + <columns>value, GTF, CCDS_INDEX, Cluster </columns> + <file path="tool-data/IsoEM.loc" /> + </table> + +</tables> +
--- a/isoem_wrapper.sh Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,256 +0,0 @@ -#!/bin/bash - - -echo $@ -echo pwd -pwd -isoEMDir=/home/projects/isoem2/isoem-workingversion -tmapPath=/usr/local/bin -bedtoolsPath=/usr/local/bin -hisat2Path=/usr/local/bin -tempDir=/tmp - - -isoem2Path=${isoEMDir}/bin - -#exit; - -arg=($*) -i=0 -for a in ${arg[*]} -do -((i++)) - if [ "$a" == "--input1" ]; then - RNAseq_1=${arg[i]} - fi - - if [ "$a" == "--input2" ]; then - RNAseq_2=${arg[i]} - fi - - if [ "$a" == "--GTF" ]; then - GTF_file=${arg[i]} - fi - - if [ "$a" == "--TMAP_INDEX" ]; then - TMAP_INDEX_file=${arg[i]} - fi - - if [ "$a" == "--HISAT2_INDEX" ]; then - HISAT2_INDEX_file=${arg[i]} - fi - - if [ "$a" == "--Cluster" ]; then - Cluster_file=${arg[i]} - fi - - if [ "$a" == "-m" ]; then - M=${arg[i]} - fi - - if [ "$a" == "-d" ]; then - D=${arg[i]} - fi - - if [ "$a" == "--out_gene_fpkm" ]; then - out_gene_fpkm=${arg[i]} - fi - - if [ "$a" == "--out_gene_tpm" ]; then - out_gene_tpm=${arg[i]} - fi - - if [ "$a" == "--out_iso_fpkm" ]; then - out_iso_fpkm=${arg[i]} - fi - - if [ "$a" == "--out_iso_tpm" ]; then - out_iso_tpm=${arg[i]} - fi - - if [ "$a" == "--out_bootstrap" ]; then - out_bootstrap=${arg[i]} - fi - - if [ "$a" == "--RNA_type" ]; then - RNAseqType=${arg[i]} - fi - - if [ "$a" == "--fastaFile" ]; then - FastaFile=${arg[i]} - fi -done - - - -if [ "${RNAseqType}" == "Ion-Torrent-Proton" ] -then - echo ${TMAP_INDEX_file} - echo Align the RNAseq_sample fastq to transcriptome using TMAP - - f=$(basename ${RNAseq_1}) -# file_type=`echo $f | tail -c 9` - -# if [ "$file_type" == "fastq.gz" ]; then - -# echo "Unzip fastq files" - -# gunzip -c ${RNAseq_1} > RNAseq_1.fastq -# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam -# fi - - file_type=`echo $f | tail -c 6` - - if [ "$file_type" == "fastq" ]; then - - ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam - fi - - file_type=`echo $f | tail -c 4` - - if [ "$file_type" == "bam" ]; then - - echo "Convert BAM to fastq" - - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq - ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam - fi - - -elif [ "${RNAseqType}" == "Illumina-paired-end" ] -then - f=$(basename ${RNAseq_1}) -# file_type=`echo $f | tail -c 9` - -# if [ "$file_type" == "fastq.gz" ]; then - -# echo "Unzip fastq files" - -# gunzip -c ${RNAseq_1} > RNAseq_1.fastq -# gunzip -c ${RNAseq_2} > RNAseq_2.fastq -# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam -# fi - - file_type=`echo $f | tail -c 6` - - if [ "$file_type" == "fastq" ]; then - - ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 ${RNAseq_1} -2 ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - fi - - file_type=`echo $f | tail -c 4` - - if [ "$file_type" == "bam" ]; then - - echo "Convert BAM to fastq" - - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq - ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - fi - - -else - f=$(basename ${RNAseq_1}) -# file_type=`echo $f | tail -c 9` - -# if [ "$file_type" == "fastq.gz" ]; then - -# echo "Unzip fastq files" - -# gunzip -c ${RNAseq_1} > RNAseq_1.fastq -# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam -# fi - - file_type=`echo $f | tail -c 6` - - if [ "$file_type" == "fastq" ]; then - - ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - fi - - if [ "$file_type" == "bam" ]; then - - echo "Convert BAM to fastq" - - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq - ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - fi - -fi - - -echo Sorting - -LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam - - -if [ "${RNAseqType}" == "Illumina-paired-end" ] -then - echo IsoEM for RNAseq mapped to transcriptome - ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam - -else - echo IsoEM for RNAseq mapped to transcriptome - ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam -fi - -echo Join estimates files with ci files - -echo ls -#ls ./aligned_reads_sorted/ -ltr - -join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 -awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm -join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm -join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm -join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm - - -#echo Adding output directory to bootstap archive -# -#echo ls -#ls ./aligned_reads_sorted/ -ltr -# -#cd aligned_reads_sorted -#echo ls -#ls -ltrh -#gunzip bootstrap.tar.gz -#tar rf bootstrap.tar output -#gzip bootstrap.tar -mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap} - - -#echo ls after gz -#ls -ltr -# -#cd .. -#pwd - - -#gunzip ./aligned_reads_sorted/bootstrap.tar.gz -#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output -#gzip ./aligned_reads_sorted/bootstrap.tar - -echo ls after gz -ls -ltr - -#4. Copy output files -############################################################# -mv gene_fpkm ${out_gene_fpkm} -mv gene_tpm ${out_gene_tpm} -mv iso_fpkm ${out_iso_fpkm} -mv iso_tpm ${out_iso_tpm} - -#5.Remove files -############################################################# -rm RNAseq_transcriptome.sam -rm aligned_reads_sorted.sam -rm -rf aligned_reads_sorted - -echo "done" -date - - - -
--- a/isoem_wrapper.xml Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,161 +0,0 @@ -<tool id="isoem" name="IsoEM2" version="1.0.0"> - <description> Infers isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data</description> - <requirements> - - </requirements> - <command interpreter="bash"> - isoem_wrapper.sh - - ## Provide outputs. - --out_gene_fpkm $out_gene_fpkm - --out_gene_tpm $out_gene_tpm - --out_iso_fpkm $out_iso_fpkm - --out_iso_tpm $out_iso_tpm - --out_bootstrap $out_bootstrap - - ## Handle reference file . - #if $referenceSource.CCDSsource == "history": - --fastaFile $referenceSource.fastaFile - #else: - --GTF $referenceSource.index.fields.GTF --TMAP_INDEX $referenceSource.index.fields.TMAP_INDEX --HISAT2_INDEX $referenceSource.index.fields.HISAT2_INDEX --Cluster $referenceSource.index.fields.Cluster - #end if - - ## First input file always required fastq1. - --input1 $Data.input1 - - ## Set params based on whether reads are single-end or paired. - #if $Data.RNAseqType == "Illumina-paired-end": - --input2 $Data.input2 - #else: - -m $Data.lengthMean - -d $Data.lengthSd - #end if - - ## RNA-Seq type based on sequencing platform. - --RNA_type $Data.RNAseqType > $Run 2>&1 - - - - </command> - <inputs> - <conditional name="referenceSource"> - <param name="CCDSsource" type="select" label="Will you upload a reference transcriptome fasta file from your history or use a built-in reference?" help="Built-ins were indexed using default options"> - <option value="indexed">Use a built-in reference</option> - <option value="history">Use reference from the history</option> - </param> - <when value="indexed"> - <param name="index" type="select" label="Select a reference dataset" help="If your reference of interest is not listed, contact the Galaxy team"> - <options from_data_table="IsoEM" /> - </param> - </when> - <when value="history"> - <param name="fastaFile" type="data" format="fasta" metadata_name="dbkey" label="Select CCDS fasta file from your history" /> - </when> <!-- history --> - </conditional> <!-- referenceSource --> - <conditional name="Data"> -<!-- - <param name="sPaired" type="select" label="Is this library Single-end or Paired-end?"> - <option value="single">Single-end</option> - <option value="paired">Paired-end</option> - </param> ---> - <param name="RNAseqType" type="select" label="Select RNA-seq type"> - <option value="Ion-Torrent-Proton">Ion Torrent single-end</option> - <option value="Illumina-paired-end">Illumina paired-end</option> - <option value="Illumina-single-end">Illumina single-end</option> - </param> <!-- RNAseqType --> - <when value="Illumina-paired-end"> - <param name="input1" type="data" label="RNA-Seq file1, fastq or bam format" /> - <param name="input2" type="data" label="RNA-Seq file2, fastq or bam format" /> - </when> - <when value="Ion-Torrent-Proton"> - <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" /> - <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" /> - <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> - </when> - <when value="Illumina-single-end"> - <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" /> - <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" /> - <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> - </when> - </conditional> <!-- Data --> -<!-- - <param name="RNAseqType" type="select" label="Select RNA-seq type"> - <option value="Ion-Torrent-Proton">Ion Torrent Proton</option> - <option value="Illumina-paired-end">Illumina paired-end</option> - <option value="Illumina-single-end">Illumina single-end</option> - </param> ---> - </inputs> - <outputs> - <data name="out_gene_fpkm" format="tabular" label="Gene_fpkm"/> - <data name="out_gene_tpm" format="tabular" label="Gene_tpm"/> - <data name="out_iso_fpkm" format="tabular" label="Iso_fpkm"/> - <data name="out_iso_tpm" format="tabular" label="Iso_tpm"/> - <data name="out_bootstrap" format="toolshed.gz" label="Bootstrap.tar.gz"/> - <data name="Run" format="log" label="isoem_wrapper: The log file" /> - </outputs> -<help> -**What it does** - -* The IsoEM can be used to infer isoform and gene expression levels from high-throughput transcriptome sequencing (RNA-Seq) data. - -**Input Format** - -* The tool accept the fastq, fastq.gz, bam formats. Extension must be specified at the end of the file names. -* RNA-seq data must be Ion Torrent Proton or Illumina sequncing data. - ------ - - -**BUILT-IN REFERENCE documentation** - -**mm10_C57BL/6:** - -* GTF file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna.GTF -* TMAP_index:/import1/tmap-index/tmap3.4.1/mm10/CCDS_nucleotide.20140407.fna -* HISAT2_index: /import1/hisat2-index/mm10_CCDS/mm10_CCDS_nucleotide.20140407 -* Cluster file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna_transcriptID_geneName.txt - -**mm10_BALB/c:** - -* GTF file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna.GTF -* TMAP_index: /import1/tmap-index/tmap3.4.1/mm10/mm10_CCDS_nucleotide.20140407_BALBc.fna -* HISAT2_index: /import1/hisat2-index/mm10_CCDS/mm10_CCDS_nucleotide.20140407_BALBc -* Cluster file: /import1/CCDS/Mm38.1/CCDS_nucleotide.20140407.fna_transcriptID_geneName.txt - -**hg19** - -* GTF file: /import1/CCDS/HsGRCh37.1/HsGRCh37.1_CCDS_nucleotide.20131129.fa.GTF -* TMAP_index: /import1/tmap-index/tmap3.4.1/hg19/hg19_CCDS_nucleotide.20131129.fa -* HISAT2_index: /import1/hisat2-index/hg19/hg19_CCDS_nucleotide.20131129.fna -* Cluster file: /import1/CCDS/HsGRCh37.1/HsGRCh37.1_CCDS.20131129_transcriptID_geneName.txt - -**hg38** - -* GTF file: /import1/CCDS/GRCh38.p2/GRCh38.p2_CCDS_nucleotide.20150512.fna.GTF -* TMAP_index: /import1/tmap-index/tmap3.4.1/hg38/hg38_CCDS_nucleotide.20150512.fna -* HISAT2_index: /import1/hisat2-index/hg38_CCDS_downloadedRef/h19_CCDS_nucleotide.20150512.fna -* Cluster file: /import1/CCDS/GRCh38.p2/GRCh38.p2_CCDS.20150512_transcriptID_geneName.txt - ------ - -**Output Format** - -* Four output files containinag results for **Gene FPKM**, **Gene TPM**, **Isoform FPKM**, and **Isoform TPM**. The four files have identical format with the following fields. - - -* 1 Gene/Isoform ID -* 2 Gene/Isoform FPKM (Fragments Per Kilobase per Million reads) or TPM (Transcripts per Million reads) -* 3 Min FPKM/TPM -* 4 Max FPKM/TPM - -* And one compressed **Bootstrap.tar** file will be used in IsoDE2 to compute gene differential expression. -</help> - - -</tool> - - - -
--- a/tool_data_table_conf.xml Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc--> -<tables> - <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq --> - <table name="IsoEM" comment_char="#"> - <columns>value, GTF, CCDS_INDEX, Cluster </columns> - <file path="tool-data/IsoEM.loc" /> - </table> - -</tables> -
--- a/tool_data_table_conf.xml.sample Mon Sep 19 22:10:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc--> -<tables> - <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq --> - <table name="IsoEM" comment_char="#"> - <columns>value, GTF, CCDS_INDEX, Cluster </columns> - <file path="tool-data/IsoEM.loc" /> - </table> - -</tables> -