changeset 7:1ea6445491d4 draft

Update to deFuse version 0.6.0
author Jim Johnson <jj@umn.edu>
date Mon, 07 Jan 2013 14:52:26 -0600 (2013-01-07)
parents 6e30713cefb0
children 57841f58676f
files README defuse.xml tool_dependencies.xml
diffstat 3 files changed, 110 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/README	Mon Jan 07 14:49:50 2013 -0600
+++ b/README	Mon Jan 07 14:52:26 2013 -0600
@@ -1,18 +1,18 @@
-The DeFuse galaxy tool is based on DeFuse_Version_0.5.0
+The DeFuse galaxy tool is based on DeFuse_Version_0.6.0
 http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
 
 DeFuse is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.
 
 
 Manual:
-http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
+http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.0
 
 The included tool_dependencies.xml will download and install the defuse code.  
 It will set the environment variable: "DEFUSE_PATH" to the location of the defuse install.  
 The tool_dependencies.xml also has the download for bowtie.
 
 
-The defuse.pl command relies on a configuration file to specifiy options, the location of reference data, and other applications that it depends upon: bowtie, bowtie-build, samtools, blat, fatotwobit, R, and Rscript.
+The defuse.pl command relies on a configuration file to specifiy options, the location of reference data, and other applications that it depends upon: bowtie, bowtie-build, samtools, gmap, blat, fatotwobit, R, and Rscript.
 
 The DeFuse galaxy tool can either construct the config.txt file that is mentioned in the defuse manual, or select an existing config.txt file in the users history.   
 When constructing the config.txt file, the DeFuse tool uses the values selected in: tool-data/defuse.loc    
@@ -27,25 +27,31 @@
 
 Generate Reference Datasets as described in the Manual: 
 
-The manual has detailed instructions on how to set up reference datasets for Human hg19 and hg18. 
-We were able to follow the same basic procedures to set up a reference for Mouse mm9.
+Reference Dataset
+The reference dataset setup process has been simplified as of deFuse 0.6.0, and deFuse now automatically downloads all required files.
+The create_reference_dataset.pl script will download the genome and other source files, and build any derivative files including bowtie indices, gmap indices, and 2bit files. Run the following command. Expect this step to take at least 12 hours.
+create_reference_dataset.pl -c config.txt
 
 These datasets should be referenced in the tool-data/defuse.loc file. 
 
 
-External Tools
+External Tools  ( http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.0 )
 deFuse relies on other publically available tools as part of its pipeline. Some of these tools are not included with the deFuse download. Obtain these tools as detailed below.
-Download bowtie: 
+Download samtools
+The latest version of samtools can be downloaded from sourceforge: https://sourceforge.net/projects/samtools/files/samtools.
+Set the samtools_bin entry in config.txt to the fully qualified paths of the samtools binary.
+Download bowtie
 The latest version of bowtie can be downloaded from sourceforge: http://sourceforge.net/projects/bowtie-bio/files/bowtie/. deFuse has been tested on version 0.12.5.
 Set the bowtie_bin and bowtie_build_bin entries in config.txt to the fully qualified paths of the bowtie and bowtie-build binaries.
 Download blat and faToTwoBit
 The latest blat tool suite can be downloaded from the ucsc website: http://hgdownload.cse.ucsc.edu/admin/exe/. Download blat and faToTwoBit and set the blat_bin and fatotwobit_bin entries in config.txt to the fully qualified paths of the blat and faToTwoBit binaries.
+Download GMAP
+The latest version of GMAP can be downloaded here http://research-pub.gene.com/gmap/. Build with a default configuration. Do not worry about the `--with-gmapdb` build flag, deFuse will request a specific directory for the database anyway.
 Download R
 The latest version of R can be downloaded from the R project website: http://www.r-project.org/. Install R and then locate the R and Rscript executables, and set the r_bin and rscript_bin entries in config.txt to the path of those executables.
-Install the kernlab package. Run R, then at the prompt type install.packages("kernlab")
-Creating required derivative files
-Once the required files and tools have been downloaded, the create_reference_dataset.pl script will build any derivative files including bowtie indices and 2bit files. Run the following command. Expect this step to take at least 12 hours.
+Install the ada package. Run R, then at the prompt type install.packages("ada")
+Reference Dataset
+The reference dataset setup process has been simplified as of deFuse 0.6.0, and deFuse now automatically downloads all required files.
+The create_reference_dataset.pl script will download the genome and other source files, and build any derivative files including bowtie indices, gmap indices, and 2bit files. Run the following command. Expect this step to take at least 12 hours.
 create_reference_dataset.pl -c config.txt
 
-
-
--- a/defuse.xml	Mon Jan 07 14:49:50 2013 -0600
+++ b/defuse.xml	Mon Jan 07 14:52:26 2013 -0600
@@ -1,8 +1,10 @@
-<tool id="defuse" name="DeFuse" version="1.5">
+<tool id="defuse" name="DeFuse" version="1.6">
  <description>identify fusion transcripts</description>
  <requirements>
-  <requirement type="package" version="0.5.0">defuse</requirement>
+  <requirement type="package" version="0.6.0">defuse</requirement>
+  <requirement type="package" version="0.1.18">samtools</requirement>
   <requirement type="package" version="0.12.7">bowtie</requirement>
+  <requirement type="package" version="2012-07-20">gmap</requirement>
   <requirement type="package" version="34x10">blat</requirement>
   <requirement type="package" version="34x10">fatotwobit</requirement>
  </requirements>
--- a/tool_dependencies.xml	Mon Jan 07 14:49:50 2013 -0600
+++ b/tool_dependencies.xml	Mon Jan 07 14:52:26 2013 -0600
@@ -1,9 +1,9 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="defuse" version="0.5.0">
+    <package name="defuse" version="0.6.0">
         <install version="1.0">
             <actions>
-                <action type="download_by_url">http://sourceforge.net/projects/defuse/files/defuse/0.5/defuse-0.5.0.tar.gz</action>
+                <action type="download_by_url">http://sourceforge.net/projects/defuse/files/defuse/0.6/defuse-0.6.0.tar.gz</action>
                 <action type="shell_command">cd tools &amp;&amp; make</action>
                 <action type="move_directory_files">
                     <source_directory>.</source_directory>
@@ -15,9 +15,36 @@
             </actions>
         </install>
         <readme>
+deFuse code
+To build the deFuse toolset you must have the boost c++ development libraries installed. If they are not installed on your system you can download them from the boost website. A full install of boost is not required. The easiest thing to do is to download the latest boost source tar.gz, extract it, then add the extracted path to the CPLUS_INCLUDE_PATH environment variable (in bash, `export CPLUS_INCLUDE_PATH=/boost/directory/:$CPLUS_INCLUDE_PATH`)
         </readme>
     </package>
 
+    <package name="samtools" version="0.1.18">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://sourceforge.net/projects/samtools/files/samtools/0.1.18/samtools-0.1.18.tar.bz2</action>
+                <action type="shell_command">sed -i.bak -e 's/-lcurses/-lncurses/g' Makefile</action>
+                <action type="shell_command">make</action>
+                <action type="move_file">
+                    <source>samtools</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>misc/maq2sam-long</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+Compiling SAMtools requires the ncurses and zlib development libraries.
+        </readme>
+    </package>
+
+
     <package name="bowtie" version="0.12.7">
         <install version="1.0">
             <actions>
@@ -45,6 +72,65 @@
         </readme>
     </package>
 
+    <package name="gmap" version="2012-07-20">
+        <install version="1.0">
+            <actions>
+	        <action type="download_by_url" target_filename="gmap-2012-07-20.tar.gz">http://research-pub.gene.com/gmap/src/gmap-gsnap-2012-07-20.v2.tar.gz</action>
+                <action type="shell_command">./configure</action>
+                <action type="shell_command">make</action>
+                <action type="move_file">
+                    <source>src/gmap</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>src/gmapindex</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>src/gsnap</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>src/uniqscan</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>src/iit_store</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>src/iit_get</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>src/atoiindex</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>src/snpindex</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>src/cmetindex</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>src/get-genome</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_directory_files">
+                    <source_directory>util</source_directory>
+                    <destination_directory>$INSTALL_DIR/bin</destination_directory>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        </readme>
+    </package>
+
     <package name="blat" version="34x10">
         <install version="1.0">
             <actions>