changeset 3:c90022a13c7c

DeFuse - Allow user to save the workspace, create an html file with links to the files in the workspace. Allow the creatation of an HTML formatted results.filtered.tsv with links to cluster detail provided by the get_read.pl command.
author Jim Johnson <jj@umn.edu>
date Fri, 06 Jan 2012 16:06:17 -0600
parents 4245c2b047de
children 679a5c7b1294
files defuse.xml
diffstat 1 files changed, 99 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/defuse.xml	Tue Nov 08 13:56:35 2011 -0600
+++ b/defuse.xml	Fri Jan 06 16:06:17 2012 -0600
@@ -1,18 +1,9 @@
-<tool id="defuse" name="DeFuse" version="1.1">
+<tool id="defuse" name="DeFuse" version="1.2">
  <description>identify fusion transcripts</description>
  <requirements>
   <requirement type="binary"></requirement>
  </requirements>
- <command interpreter="perl">
-  ## Find the defuse.pl in the galaxy tool path
-  #import Cheetah.FileUtils
-  #set $toolpath = '/'.join([$__root_dir__,'tools','defuse'])
-  #set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0]
-  $defuse
-  -c `cp $defuse_config $config_txt; echo $defuse_config`
-  -d `mkdir -p data_dir; ln -s $left_pairendreads data_dir/reads_1.fastq; ln -s $right_pairendreads data_dir/reads_2.fastq; echo data_dir`
-  -o  output_dir -p 8
- </command>
+  <command interpreter="command"> /bin/bash $shscript </command>
  <inputs>
   <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads.  (FASTQ interlacer will pair reads and remove the unpaired.   FASTQ de-interlacer will separate the result into left and right reads.)"/>
   <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/>
@@ -77,6 +68,8 @@
         <param name="config" type="data" format="txt" label="Defuse Config file" help=""/>
       </when>  <!-- history -->
   </conditional>  <!-- refGenomeSource -->
+  <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files"/>
+  <param name="do_get_reads" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
  </inputs>
  <configfiles>
   <configfile name="defuse_config">
@@ -243,7 +236,7 @@
 #try
 $ref_dict['bowtie_threads']
 #except
-1
+4
 #end try
 bowtie_quals = #slurp
 #try
@@ -502,13 +495,103 @@
 #end if
 
   </configfile>
+  <configfile name="shscript">
+#!/bin/bash
+## define some things for cheetah proccessing
+#set $ds = chr(36)
+#set $gt = chr(62)
+#set $lt = chr(60)
+#set $echo_cmd = 'echo'
+## Find the defuse.pl in the galaxy tool path
+#import Cheetah.FileUtils
+#set $toolpath = '/'.join([$__root_dir__,'tools','defuse'])
+#set $defuse = $Cheetah.FileUtils.findFiles($toolpath,['defuse.pl'],[],['tools','external','include','em','data'])[0]
+#set $get_reads = $Cheetah.FileUtils.findFiles($toolpath,['get_reads.pl'],[],['tools','external','include','em','data'])[0]
+## declare a bash function for converting a results tsv into html with links to the get_reads output files
+results2html() {
+  rlts=${ds}1
+  rslt_name=`basename ${ds}rlts`
+  html=${ds}2
+  echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse '${ds}rslt_name'${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt}  ${ds}html
+  echo '${lt}h2${gt}Defuse '${ds}rslt_name'${lt}/h2${gt}${lt}table${gt}' ${gt}${gt}  ${ds}html
+  if [ -z "${ds}3" ]  
+  then
+    awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\
+         ${ds}1 ~ /[1-9][0-9]*/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html
+    echo '${lt}/table${gt}' ${gt}${gt} ${ds}html
+    echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt}  ${ds}html
+  else
+    export _EFP=${ds}3
+    mkdir -p ${ds}_EFP
+    awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\
+         ${ds}1 ~ /[1-9][0-9]*/{fn="cluster_"${ds}1"_reads.txt"; \
+          printf("${lt}tr${gt}${lt}td${gt}${lt}a href=\"%s\"${gt}%s${lt}/a${gt}${lt}/td${gt}",fn, ${ds}1);for (i = 2; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html
+    echo '${lt}/table${gt}' ${gt}${gt} ${ds}html
+    echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt}  ${ds}html
+    for i in `awk '${ds}1 ~ /[1-9][0-9]*/{print ${ds}1}' ${ds}rlts`;
+      do fn=cluster_${ds}{i}_reads.txt;
+      pn=${ds}_EFP/${ds}fn;
+      perl $get_reads -c $defuse_config -o output_dir -i ${ds}i ${gt} ${ds}pn;
+    done
+  fi
+}
+## copy config to output
+cp $defuse_config $config_txt
+## make a data_dir  and ln -s the input fastq
+mkdir -p data_dir
+ln -s $left_pairendreads data_dir/reads_1.fastq
+ln -s $right_pairendreads data_dir/reads_2.fastq
+## ln to output_dir in from_work_dir
+#if $defuse_out.__str__ != 'None':
+mkdir -p $defuse_out.extra_files_path
+ln -s $defuse_out.extra_files_path  output_dir
+#else
+mkdir -p output_dir
+#end if
+## run defuse.pl
+perl $defuse -c $defuse_config -d data_dir -o output_dir  -p 8
+## copy primary results to output datasets
+if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi
+if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
+if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi
+if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi
+## create html with links for output_dir
+#if $defuse_out.__str__ != 'None':
+if [ -e $defuse_out ]
+then
+  echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse Output${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} $defuse_out
+  echo '${lt}h2${gt}Defuse Output Files${lt}/h2${gt}${lt}ul${gt}' ${gt}${gt}  $defuse_out
+  pushd $defuse_out.extra_files_path
+  for f in `find -L . -maxdepth 1 -type f`; 
+   do fn=`basename ${ds}f`; echo '${lt}li${gt}${lt}a href="'${ds}fn'"${gt}'${ds}fn'${lt}/a${gt}${lt}/li${gt}' ${gt}${gt}  $defuse_out; 
+  done
+  popd
+  echo '${lt}/ul${gt}' ${gt}${gt} $defuse_out
+  echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt}  $defuse_out
+fi
+#end if
+## run get_reads.pl on each cluster
+#if $fusion_reads.__str__ != 'None':
+if [ -e output_dir/results.filtered.tsv -a -e $fusion_reads ] 
+then
+  mkdir -p $fusion_reads.extra_files_path
+  results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.extra_files_path
+fi
+#end if
+  </configfile>
  </configfiles>
  <outputs>
   <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/>
-  <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" from_work_dir="output_dir/log/defuse.log"/>
-  <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" from_work_dir="output_dir/results.tsv"/>
-  <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" from_work_dir="output_dir/results.filtered.tsv"/>
-  <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" from_work_dir="output_dir/results.classify.tsv"/>
+  <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" />
+  <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output">
+    <filter>keep_output == True</filter>
+  </data>
+  <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads">
+    <filter>do_get_reads == True</filter>
+  </data>
+  <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" />
+  <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" />
+  <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" />
  </outputs>
  <tests>
  </tests>