changeset 0:8888e4e3f169

Migrated tool version 0.1 from old tool shed archive to new tool shed repository
author fubar
date Tue, 07 Jun 2011 17:05:43 -0400
parents
children cf59a9ae0efe
files rgclustal/README rgclustal/rgClustal_testin.fasta rgclustal/rgClustal_testout.fasta rgclustal/rgClustal_testout.log rgclustal/rgClustalw.xml
diffstat 5 files changed, 359 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgclustal/README	Tue Jun 07 17:05:43 2011 -0400
@@ -0,0 +1,49 @@
+This is a wrapper for ClustalW.
+
+This tool allows you to align multiple sequences in Galaxy, using ClustalW2_ with mostly default options which should work reasonably well for many alignments.
+DNA or protein sequences can be aligned. The input file must be a fasta file in your current history.
+
+The alignments will appear as a clustal format file or optionally, as phylip or fasta format files in your history and a text log will be output to your history 
+showing the output Clustalw would normally write to standard output.
+
+If Clustal format is chosen, you have the option of adding basepair counts to the output
+
+A subsequence of the alignment can be output by setting the Output complete parameter to "Partial" and defining the offset and end of the subsequence to be output 
+
+**Installation**
+
+Make sure clustalw2 is available on the path for all your nodes
+
+Move the test data files to your galaxy root test-data
+Move the xml file to a subdirectory of your tools folder (eg rgenetics/) and then add a line in your tool_conf.xml to point there.
+Run
+sh run_functional_tests.sh -id clustalw
+to make sure the tests work
+
+then restart Galaxy and you should be good to go.
+
+**Attribution**
+
+Clustal attribution and associated documentation are available at http://www.clustal.org
+
+An implementation of a Galaxy Clustal wrapper was written by Hans-Rudolf Hotz in an email on the developer list - 
+http://lists.bx.psu.edu/pipermail/galaxy-dev/2010-November/003732.html
+
+This version by Ross Lazarus for the rgenetics project, builds on Hans-Rudolf's code, adding some additional controls and a log file. It also
+deals with stderr so Cluastalw2 writing there doesn't cause the job to error out. That's encoded in the tail of the command line. 
+
+**License**
+
+Assuming Hans-Rudolf is ok with a new license for this derived work, this version of his wrapper is LGPL like other rgenetics artefacts
+
+Written by Ross Lazarus for the Rgenetics project
+
+Copyright Ross Lazarus at gmail com 2011
+
+All rights reserved.
+
+Released under the LGPL - see http://www.gnu.org/copyleft/lesser.html
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgclustal/rgClustal_testin.fasta	Tue Jun 07 17:05:43 2011 -0400
@@ -0,0 +1,25 @@
+>c_briggsae-chrII(+)/43862-46313
+ATGAGCTTCCACAAAAGCATGAGCTTTCTCAGCTTCTGCCACATCAGCATTCAAATGATC
+>c_remanei-Crem_Contig172(-)/123228-124941
+ATGAGCCTCTACAACCGCATGATTCTTTTCAGCCTCTGCCACGTCCGCATTCAAATGCTC
+>c_brenneri-Cbre_Contig60(+)/627772-630087
+ATGAGCCTCCACAACAGCATGATTTTTCTCGGCTTCCGCCACATCCGCATTCAAATGATC
+>c_elegans-II(+)/9706834-9708803
+ATGAGCCTCTACTACAGCATGATTCTTCTCAGCTTCTGCAACGTCAGCATTCAGATGATC
+>c_briggsae-chrIfooI(+)/43862-46313
+CGCACAAATATGATGCACAAATCCACAACCTAAAGCATCTCCGATAACGTTGACCGAAGT
+>c_remanei-Crem_Contig172foo(-)/123228-124941
+AGCACAAATGTAATGAACGAATCCGCATCCCAACGCATCGCCAATCACATTCACAGATGT
+>c_brenneri-Cbre_Contig60gak(+)/627772-630087
+CGCACAAATGTAGTGGACAAATCCGCATCCCAAAGCGTCTCCGATAACATTTACCGAAGT
+>c_elegans-II(+)more/9706834-9708803
+TGCACAAATGTGATGAACGAATCCACATCCCAATGCATCACCGATCACATTGACAGATGT
+>c_briggsae-chrII(+)bar/43862-46313
+CCGGAGTCGATCCCTGAAT-----------------------------------------
+>c_remanei-Crem_Contig172zot(-)/123228-124941
+ACGAAGTCGGTCCCTATAAGGTATGATTTTATATGA----TGTACCATAAGGAAATAGTC
+>c_brenneri-Cbre_Contig60fee(+)/627772-630087
+ACGAAGTCGATCCCTGAAA---------TCAGATGAGCGGTTGACCA---GAGAACAACC
+>c_elegans-II(+)meh/9706834-9708803
+ACGAAGTCGGTCCCTGAAC--AATTATTT----TGA----TATA---GAAAGAAACGGTA
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgclustal/rgClustal_testout.fasta	Tue Jun 07 17:05:43 2011 -0400
@@ -0,0 +1,48 @@
+>c_briggsae-chrII_+_
+---ATGAGCTTCCACAAAAGCATGAGCTTT
+CTCAGCTTCTGCCACATCAGCATTCAAATG
+ATC
+>c_brenneri-Cbre_Contig60_+_
+---ATGAGCCTCCACAACAGCATGATTTTT
+CTCGGCTTCCGCCACATCCGCATTCAAATG
+ATC
+>c_remanei-Crem_Contig172_-_
+---ATGAGCCTCTACAACCGCATGATTCTT
+TTCAGCCTCTGCCACGTCCGCATTCAAATG
+CTC
+>c_elegans-II_+_
+---ATGAGCCTCTACTACAGCATGATTCTT
+CTCAGCTTCTGCAACGTCAGCATTCAGATG
+ATC
+>c_briggsae-chrII_+_bar
+---CCGGAGTCGATCCCTGAAT--------
+------------------------------
+---
+>c_brenneri-Cbre_Contig60fee_+_
+---ACGAAGTCGATCCCTGAAA--------
+-TCAGATGAGCGGTTGACCA---GAGAACA
+ACC
+>c_remanei-Crem_Contig172zot_-_
+---ACGAAGTCGGTCCCTATAAGGTATGAT
+TTTATATGA----TGTACCATAAGGAAATA
+GTC
+>c_elegans-II_+_meh
+---ACGAAGTCGGTCCCTGAAC--AATTAT
+TT----TGA----TATA---GAAAGAAACG
+GTA
+>c_briggsae-chrIfooI_+_
+CGCACAAATATGATGCACAAATCCACAACC
+TAAAGCATCTCCGATAACGTTGACCGAAGT
+---
+>c_brenneri-Cbre_Contig60gak_+_
+CGCACAAATGTAGTGGACAAATCCGCATCC
+CAAAGCGTCTCCGATAACATTTACCGAAGT
+---
+>c_remanei-Crem_Contig172foo_-_
+AGCACAAATGTAATGAACGAATCCGCATCC
+CAACGCATCGCCAATCACATTCACAGATGT
+---
+>c_elegans-II_+_more
+TGCACAAATGTGATGAACGAATCCACATCC
+CAATGCATCACCGATCACATTGACAGATGT
+---
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgclustal/rgClustal_testout.log	Tue Jun 07 17:05:43 2011 -0400
@@ -0,0 +1,112 @@
+
+
+
+ CLUSTAL 2.1 Multiple Sequence Alignments
+
+
+Sequence type explicitly set to DNA
+Sequence format is Pearson
+Sequence 1: c_briggsae-chrII_+_/43862-46313                 60 bp
+Sequence 2: c_remanei-Crem_Contig172_-_/123228-124941       60 bp
+Sequence 3: c_brenneri-Cbre_Contig60_+_/627772-630087       60 bp
+Sequence 4: c_elegans-II_+_/9706834-9708803                 60 bp
+Sequence 5: c_briggsae-chrIfooI_+_/43862-46313              60 bp
+Sequence 6: c_remanei-Crem_Contig172foo_-_/123228-124941    60 bp
+Sequence 7: c_brenneri-Cbre_Contig60gak_+_/627772-630087    60 bp
+Sequence 8: c_elegans-II_+_more/9706834-9708803             60 bp
+Sequence 9: c_briggsae-chrII_+_bar/43862-46313              60 bp
+Sequence 10: c_remanei-Crem_Contig172zot_-_/123228-124941    60 bp
+Sequence 11: c_brenneri-Cbre_Contig60fee_+_/627772-630087    60 bp
+Sequence 12: c_elegans-II_+_meh/9706834-9708803              60 bp
+Start of Pairwise alignments
+Aligning...
+
+Sequences (1:2) Aligned. Score:  80
+Sequences (1:3) Aligned. Score:  88
+Sequences (1:4) Aligned. Score:  83
+Sequences (1:5) Aligned. Score:  21
+Sequences (1:6) Aligned. Score:  20
+Sequences (1:7) Aligned. Score:  23
+Sequences (1:8) Aligned. Score:  18
+Sequences (1:9) Aligned. Score:  21
+Sequences (1:10) Aligned. Score:  16
+Sequences (1:11) Aligned. Score:  25
+Sequences (1:12) Aligned. Score:  10
+Sequences (2:3) Aligned. Score:  85
+Sequences (2:4) Aligned. Score:  86
+Sequences (2:5) Aligned. Score:  21
+Sequences (2:6) Aligned. Score:  20
+Sequences (2:7) Aligned. Score:  25
+Sequences (2:8) Aligned. Score:  20
+Sequences (2:9) Aligned. Score:  36
+Sequences (2:10) Aligned. Score:  16
+Sequences (2:11) Aligned. Score:  22
+Sequences (2:12) Aligned. Score:  17
+Sequences (3:4) Aligned. Score:  85
+Sequences (3:5) Aligned. Score:  13
+Sequences (3:6) Aligned. Score:  20
+Sequences (3:7) Aligned. Score:  25
+Sequences (3:8) Aligned. Score:  20
+Sequences (3:9) Aligned. Score:  36
+Sequences (3:10) Aligned. Score:  16
+Sequences (3:11) Aligned. Score:  18
+Sequences (3:12) Aligned. Score:  25
+Sequences (4:5) Aligned. Score:  13
+Sequences (4:6) Aligned. Score:  11
+Sequences (4:7) Aligned. Score:  20
+Sequences (4:8) Aligned. Score:  10
+Sequences (4:9) Aligned. Score:  31
+Sequences (4:10) Aligned. Score:  17
+Sequences (4:11) Aligned. Score:  29
+Sequences (4:12) Aligned. Score:  14
+Sequences (5:6) Aligned. Score:  73
+Sequences (5:7) Aligned. Score:  83
+Sequences (5:8) Aligned. Score:  80
+Sequences (5:9) Aligned. Score:  31
+Sequences (5:10) Aligned. Score:  14
+Sequences (5:11) Aligned. Score:  14
+Sequences (5:12) Aligned. Score:  12
+Sequences (6:7) Aligned. Score:  80
+Sequences (6:8) Aligned. Score:  88
+Sequences (6:9) Aligned. Score:  26
+Sequences (6:10) Aligned. Score:  16
+Sequences (6:11) Aligned. Score:  25
+Sequences (6:12) Aligned. Score:  12
+Sequences (7:8) Aligned. Score:  78
+Sequences (7:9) Aligned. Score:  31
+Sequences (7:10) Aligned. Score:  10
+Sequences (7:11) Aligned. Score:  12
+Sequences (7:12) Aligned. Score:  12
+Sequences (8:9) Aligned. Score:  31
+Sequences (8:10) Aligned. Score:  10
+Sequences (8:11) Aligned. Score:  14
+Sequences (8:12) Aligned. Score:  12
+Sequences (9:10) Aligned. Score:  63
+Sequences (9:11) Aligned. Score:  84
+Sequences (9:12) Aligned. Score:  78
+Sequences (10:11) Aligned. Score:  64
+Sequences (10:12) Aligned. Score:  76
+Sequences (11:12) Aligned. Score:  46
+Guide tree file created:   [/share/shared/galaxy/database/files/002/dataset_2705.dnd]
+
+There are 11 groups
+Start of Multiple Alignment
+
+Aligning...
+Group 1: Sequences:   2      Score:1045
+Group 2: Sequences:   2      Score:1016
+Group 3: Sequences:   4      Score:1001
+Group 4: Sequences:   2      Score:313
+Group 5: Sequences:   2      Score:731
+Group 6: Sequences:   4      Score:516
+Group 7: Sequences:   8      Score:344
+Group 8: Sequences:   2      Score:1016
+Group 9: Sequences:   2      Score:1054
+Group 10: Sequences:   4      Score:945
+Group 11: Sequences:  12      Score:380
+Alignment Score 6283
+firstres = 1 lastres = 63
+FASTA file created!
+
+Fasta-Alignment file created    [/share/shared/galaxy/database/files/002/dataset_2726.dat]
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgclustal/rgClustalw.xml	Tue Jun 07 17:05:43 2011 -0400
@@ -0,0 +1,125 @@
+<tool id="clustalw" name="ClustalW" version="0.1">
+   <description>multiple sequence alignment program for DNA or proteins</description>
+   <command> 
+    #if   ($range.mode=="part")
+    clustalw2 -infile=$input -outfile=$output -OUTORDER=$out_order -RANGE=$range.seq_range_start,$range.seq_range_end
+    #elif ($range.mode=="complete")
+    clustalw2 -infile=$input -outfile=$output -OUTORDER=$out_order 
+    #end if
+    #if ($outcontrol.outform=="clustal")
+    -SEQNOS=$outcontrol.out_seqnos
+    #end if
+    #if ($outcontrol.outform=="phylip")
+    -OUTPUT=PHYLIP
+    #end if
+    #if ($outcontrol.outform=="fasta")
+    -OUTPUT=FASTA
+    #end if
+    -TYPE=$dnarna 1&gt;$outlog  2&gt;&amp;1
+   </command>
+  <inputs>
+   <page>
+    <param format="fasta" name="input" type="data" label="Fasta File" />
+    <param name="outname" label="Name for output files to make it easy to remember what you did" type="text" size="50" value="Clustal_run" />
+    <param name="dnarna" type="select" label="Data Type">
+      <option value="DNA" selected="True">DNA nucleotide sequences</option>
+      <option value="PROTEIN">Protein sequences</option>
+    </param>
+    <conditional name="outcontrol">
+      <param name="outform" type="select" label="Output alignment format">
+        <option value="clustal" selected="True">Native Clustal output format</option>
+        <option value="phylip">Phylip format</option>
+        <option value="fasta">Fasta format</option>
+      </param>
+      <when value="fasta" />
+      <when value="phylip" />
+      <when value="clustal">
+       <param name="out_seqnos" type="select" label="Show residue numbers in clustal format output">
+         <option value="ON">yes</option>
+         <option value="OFF" selected="true">no</option>
+       </param>
+      </when>
+    </conditional>
+    <param name="out_order" type="select" label="Output Order">
+      <option value="ALIGNED">aligned</option>
+      <option value="INPUT">same order as input file</option>
+    </param>
+
+    <conditional name="range">
+        <param name="mode" type="select" label="Output complete alignment (or specify part to output)">
+          <option value="complete">complete alignment</option>
+          <option value="part">only part of the alignment</option>
+        </param>
+        <when value="complete">
+        </when>
+        <when value="part">    
+           <param name="seq_range_start" size="5" type="integer" value="1" label="start point" help="sequence range to write">
+           </param>
+           <param name="seq_range_end" size="5" type="integer" value="99999" label="end point" >
+           </param> 
+        </when>
+    </conditional>
+   </page>
+  </inputs>
+  <outputs>
+    <data format="clustal" name="output"  label="${outname}_output.${outcontrol.outform}">
+       <change_format>
+           <when input="outcontrol.outform" value="phylip" format="phylip" />
+           <when input="outcontrol.outform" value="fasta" format="fasta" />
+       </change_format>
+    </data>
+    <data format="txt" name="outlog"  label="${outname}_clustal_log.txt"/>
+  </outputs>
+  <tests>
+     <test>
+        <param name="input" value="rgClustal_testin.fasta" />
+      <param name = "outname" value="" />
+      <param name = "outform" value="fasta" />
+      <param name = "dnarna" value="DNA" />
+      <param name = "mode" value="complete" />
+      <param name = "out_order" value="ALIGNED" />
+      <output name="output" file="rgClustal_testout.fasta" ftype="fasta" />
+      <output name="output" file="rgClustal_testout.log" ftype="txt" lines_diff="5" />
+     </test>
+  </tests>
+  <help>
+
+**Note**
+
+This tool allows you to run a multiple sequence alignment with ClustalW2 (see Clustsrc_) using the default options.
+ 
+For a tutorial introduction, see ClustalW2_
+
+You can align DNA or protein sequences in the input file which should be multiple sequences to be aligned in a fasta file
+
+A log will be output to your history showing the output Clustal would normally write to standard output.
+
+The alignments will appear as a clustal format file or optionally, as phylip or fasta format files in your history. If you choose fasta as 
+the output format, you can create a 'Logo' image using the Sequence Logo tool.
+
+If Clustal format is chosen, you have the option of adding basepair counts to the output
+
+A subsequence of the alignment can be output by setting the Output complete parameter to "Partial" and defining the offset and end of the subsequence to be output 
+
+----
+
+**Attribution**
+
+Clustal attribution and associated documentation are available at Clustsrc_
+
+The first iteration of this Galaxy wrapper was written by Hans-Rudolf Hotz - see Clustfirst_
+
+It was modified by Ross Lazarus for the rgenetics project
+
+This wrapper is now LGPL
+
+.. _ClustalW2: http://www.ebi.ac.uk/2can/tutorials/protein/clustalw.html  
+
+.. _Clustsrc: http://www.clustal.org
+
+.. _Clustfirst: http://lists.bx.psu.edu/pipermail/galaxy-dev/2010-November/003732.html
+
+  </help>
+
+</tool>
+