changeset 4:679a5c7b1294 draft

deFuse version 0.5.0 - Use tool_dependencies.xml
author Jim Johnson <jj@umn.edu>
date Fri, 04 Jan 2013 13:29:03 -0600
parents c90022a13c7c
children 3bd1087db05e
files README defuse-0.4.3.tar.gz defuse.xml tool-data/defuse.loc.sample tool_dependencies.xml
diffstat 5 files changed, 75 insertions(+), 42 deletions(-) [+]
line wrap: on
line diff
--- a/README	Fri Jan 06 16:06:17 2012 -0600
+++ b/README	Fri Jan 04 13:29:03 2013 -0600
@@ -1,29 +1,35 @@
-The DeFuse galaxy tool is based on DeFuse_Version_0.4.3
-  http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
+The DeFuse galaxy tool is based on DeFuse_Version_0.5.0
+http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
 
 DeFuse is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.
 
 
 Manual:
-  http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.3
+http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
+
+The included tool_dependencies.xml will download and install the defuse code.  
+It will set the environment variable: "DEFUSE_PATH" to the location of the defuse install.  
 
 
-The included defuse source code is from:   http://sourceforge.net/projects/defuse/files/defuse/0.4/defuse-0.4.3.tar.gz/download 
-(to install uncompress the defuse source and navigate to the tools dir and type make)
-tar zxf defuse-0.4.3.tar.gz
-cd defuse-0.4.3/tool 
-make
-cd ../..
+The defuse.pl command relies on a configuration file to specifiy options, the location of reference data, and other applications that it depends upon: bowtie, bowtie-build, samtools, blat, fatotwobit, R, and Rscript.
+
+The DeFuse galaxy tool can either construct the config.txt file that is mentioned in the defuse manual, or select an existing config.txt file in the users history.   
+When constructing the config.txt file, the DeFuse tool uses the values selected in: tool-data/defuse.loc    
+The dictionary field in the tool-data/defuse.loc can be used to set fields in the config.txt file, including the site specific location of reference data and the paths to the other application binaries.  
+The "Defuse parameter settings" are used to alter options in the config.txt file.
+
+The DeFuse galaxy tool also generates a bash script to run defuse.  
+That script will attempt to edit the config.txt file to specifiy any unset paths to applications that defuse relies upon:
+bowtie, bowtie-build, samtools, blat, fatotwobit, R, and Rscript
+The script uses the using the shell "which" command to discover the application path, so the required applications should in PATH environment variable.
+
 
 Generate Reference Datasets as described in the Manual: 
 
-  The manual has detailed instructions on how to set up reference datasets for Human hg19 and hg18.  
-  We were able to follow the same basic procedures to set up a reference for Mouse mm9.
+The manual has detailed instructions on how to set up reference datasets for Human hg19 and hg18. 
+We were able to follow the same basic procedures to set up a reference for Mouse mm9.
 
-  These datasets should be referenced in the tool-data/defuse.loc file.   
+These datasets should be referenced in the tool-data/defuse.loc file. 
 
 
-The defuse.xml galaxy tool wrapper will generate a defuse config.txt using values from tool-data/defuse.loc
-and call defuse-0.4.3/scripts/defuse.pl
 
-
Binary file defuse-0.4.3.tar.gz has changed
--- a/defuse.xml	Fri Jan 06 16:06:17 2012 -0600
+++ b/defuse.xml	Fri Jan 04 13:29:03 2013 -0600
@@ -1,7 +1,10 @@
-<tool id="defuse" name="DeFuse" version="1.2">
+<tool id="defuse" name="DeFuse" version="1.5">
  <description>identify fusion transcripts</description>
  <requirements>
-  <requirement type="binary"></requirement>
+  <requirement type="package" version="0.5.0">defuse</requirement>
+  <requirement type="package">bowtie</requirement>
+  <requirement type="package">blat</requirement>
+  <requirement type="package">fatotwobit</requirement>
  </requirements>
   <command interpreter="command"> /bin/bash $shscript </command>
  <inputs>
@@ -69,7 +72,7 @@
       </when>  <!-- history -->
   </conditional>  <!-- refGenomeSource -->
   <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files"/>
-  <param name="do_get_reads" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
+  <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
  </inputs>
  <configfiles>
   <configfile name="defuse_config">
@@ -91,15 +94,7 @@
 #try
 $ref_dict['source_directory']
 #except
-#try
-## Try to find the defuse source dir in the galaxy tool path
-#import Cheetah.FileUtils
-#set $toolpath = '/'.join([$__root_dir__,'tools','defuse'])
-#set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0]
-$defuse.replace('/scripts/defuse.pl','')
-#except
-${__root_dir__}/tools/defuse/defuse
-#end try
+__DEFUSE_PATH__
 #end try
 
 # Directory where you want your dataset
@@ -159,37 +154,37 @@
 #try
 $ref_dict['bowtie_bin']
 #except
-/soft/bowtie/0.12.7/bowtie
+__BOWTIE_BIN__
 #end try
 bowtie_build_bin = #slurp
 #try
 $ref_dict['bowtie_build_bin']
 #except
-/soft/bowtie/0.12.7/bowtie-build
+__BOWTIE_BUILD_BIN__
 #end try
 blat_bin = #slurp
 #try
 $ref_dict['blat_bin']
 #except
-/soft/blat/34/bin/blat
+__BLAT_BIN__
 #end try
 fatotwobit_bin = #slurp
 #try
 $ref_dict['fatotwobit_bin']
 #except
-/soft/blat/34/bin/faToTwoBit
+__FATOTWOBIT_BIN__
 #end try
 r_bin = #slurp
 #try
 $ref_dict['r_bin']
 #except
-/project/sdml-sles11-weblocal/R-2.12.1/bin/R
+__R_BIN__
 #end try
 rscript_bin = #slurp
 #try
 $ref_dict['rscript_bin']
 #except
-/project/sdml-sles11-weblocal/R-2.12.1/bin/Rscript
+__RSCRIPT_BIN__
 #end try
 
 #raw
@@ -499,14 +494,12 @@
 #!/bin/bash
 ## define some things for cheetah proccessing
 #set $ds = chr(36)
+#set $amp = chr(38)
 #set $gt = chr(62)
 #set $lt = chr(60)
 #set $echo_cmd = 'echo'
 ## Find the defuse.pl in the galaxy tool path
 #import Cheetah.FileUtils
-#set $toolpath = '/'.join([$__root_dir__,'tools','defuse'])
-#set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0]
-#set $get_reads = $Cheetah.FileUtils.findFiles($toolpath,['get_reads.pl'],[],['tools','external','include','em','data'])[0]
 ## declare a bash function for converting a results tsv into html with links to the get_reads output files
 results2html() {
   rlts=${ds}1
@@ -531,10 +524,21 @@
     for i in `awk '${ds}1 ~ /[1-9][0-9]*/{print ${ds}1}' ${ds}rlts`;
       do fn=cluster_${ds}{i}_reads.txt;
       pn=${ds}_EFP/${ds}fn;
-      perl $get_reads -c $defuse_config -o output_dir -i ${ds}i ${gt} ${ds}pn;
+      perl \${DEFUSE_PATH}/scripts/get_reads.pl -c $defuse_config -o output_dir -i ${ds}i ${gt} ${ds}pn;
     done
   fi
 }
+## substitute pathnames into config file
+if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi
+if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi
+if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi
+if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi
+if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi
+if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi
+if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi
+if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi
+
+
 ## copy config to output
 cp $defuse_config $config_txt
 ## make a data_dir  and ln -s the input fastq
@@ -549,7 +553,7 @@
 mkdir -p output_dir
 #end if
 ## run defuse.pl
-perl $defuse -c $defuse_config -d data_dir -o output_dir  -p 8
+perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -d data_dir -o output_dir  -p 8
 ## copy primary results to output datasets
 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi
 if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
--- a/tool-data/defuse.loc.sample	Fri Jan 06 16:06:17 2012 -0600
+++ b/tool-data/defuse.loc.sample	Fri Jan 04 13:29:03 2013 -0600
@@ -1,9 +1,11 @@
-## Configurstion info for prepared data references for DeFuse Version 0.4.3 
-#  http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
+## Configurstion info for prepared data references for DeFuse
 ## http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
 ## 3 columns separated by the TAB character
 ## The 3rd column has dictionary values that will be substituted in the config file for defuse
 ## It should likely contain keys:   dataset_directory gene_models genome_fasta repeats_filename est_fasta est_alignments unigene_fasta
+## If this is not a Homo_sapiens reference also need keys:  gene_id_pattern transcript_id_pattern chromosomes
+
 #db_key	name	{'config_key':'config_value'}
-hg19	GRCh37(hg19)	{'samtools_bin':'/soft/samtools/0.1.8/bin/samtools', 'dataset_directory':'/project/db/genomes/Hsapiens/hg19/defuse', 'gene_models':'$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf', 'genome_fasta':'$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Hs.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
-mm9	NCBIM37(mm9)	{'samtools_bin':'/soft/samtools/0.1.8/bin/samtools', 'dataset_directory':'/project/db/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM37.63.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM37.63.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
+#hg19	GRCh37(hg19)	{'gene_id_pattern':'ENSG\d+', 'transcript_id_pattern':'ENST\d+', 'dataset_directory':'/data/genomes/Hsapiens/hg19/defuse', 'gene_models':'$(dataset_directory)/Homo_sapiens.GRCh37.62.gtf', 'genome_fasta':'$(dataset_directory)/Homo_sapiens.GRCh37.62.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Hs.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
+#mm9	NCBIM37(mm9)	{'gene_id_pattern':'ENSMUSG\d+', 'transcript_id_pattern':'ENSMUST\d+', 'dataset_directory':'/data/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM37.63.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM37.63.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'chromosomes':'1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
+#mm8	NCBIM36(mm8)	{'gene_id_pattern':'ENSMUSG\d+', 'transcript_id_pattern':'ENSMUST\d+', 'dataset_directory':'/data/genomes/Mmusculus/mm9/defuse', 'gene_models':'$(dataset_directory)/Mus_musculus.NCBIM36.46.gtf', 'genome_fasta':'$(dataset_directory)/Mus_musculus.NCBIM36.46.dna.chromosome.fa', 'repeats_filename':'$(dataset_directory)/rmsk.txt', 'est_fasta':'$(dataset_directory)/est.fa', 'est_alignments':'$(dataset_directory)/intronEst.txt', 'unigene_fasta':'$(dataset_directory)/Mm.seq.uniq', 'mt_chromosome':'MT', 'gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding', 'ig_gene_sources':'IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene', 'rrna_gene_sources':'Mt_rRNA,rRNA,rRNA_pseudogene'}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Jan 04 13:29:03 2013 -0600
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="defuse" version="0.5.0">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://sourceforge.net/projects/defuse/files/defuse/0.5/defuse-0.5.0.tar.gz</action>
+                <action type="shell_command">cd tools &amp;&amp; make</action>
+                <action type="move_directory_files">
+                    <source_directory>.</source_directory>
+                    <destination_directory>$INSTALL_DIR</destination_directory>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="DEFUSE_PATH" action="set_to">$INSTALL_DIR</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        </readme>
+    </package>
+
+</tool_dependency>