Mercurial > repos > devteam > clustalw
changeset 0:7cc64024fe92 draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 27 Jan 2014 09:28:58 -0500 |
parents | |
children | 973f9ca114fb |
files | rgClustalw.py rgClustalw.xml test-data/rgClustal_testin.fasta test-data/rgClustal_testout.fasta test-data/rgClustal_testout.log tool_dependencies.xml |
diffstat | 6 files changed, 414 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgClustalw.py Mon Jan 27 09:28:58 2014 -0500 @@ -0,0 +1,60 @@ +""" +rgclustalw.py +wrapper for clustalw necessitated by bad choice of output path for .dnd file based on input file. Naughty. +Copyright ross lazarus march 2011 +All rights reserved +Licensed under the LGPL +""" + +import sys,optparse,os,subprocess,tempfile,shutil + +class Clustrunner: + """ + """ + def __init__(self,opts=None): + self.opts = opts + self.iname = 'infile_copy' + shutil.copy(self.opts.input,self.iname) + + def run(self): + tlf = open(self.opts.outlog,'w') + cl = ['clustalw2 -INFILE=%s -OUTFILE=%s -OUTORDER=%s -TYPE=%s -OUTPUT=%s' % (self.iname,self.opts.output,self.opts.out_order,self.opts.dnarna,self.opts.outform)] + if self.opts.seq_range_end <> None and self.opts.seq_range_start <> None: + cl.append('-RANGE=%s,%s' % (self.opts.seq_range_start,self.opts.seq_range_end)) + if self.opts.outform=='CLUSTAL' and self.opts.outseqnos <> None: + cl.append('-SEQNOS=ON') + process = subprocess.Popen(' '.join(cl), shell=True, stderr=tlf, stdout=tlf) + rval = process.wait() + dndf = '%s.dnd' % self.iname + if os.path.exists(dndf): + tlf.write('\nClustal created the following dnd file for your information:\n') + dnds = open('%s.dnd' % self.iname,'r').readlines() + for row in dnds: + tlf.write(row) + tlf.write('\n') + tlf.close() + os.unlink(self.iname) + + + +if __name__ == "__main__": + op = optparse.OptionParser() + op.add_option('-i', '--input', default=None) + op.add_option('-o', '--output', default=None) + op.add_option('-t', '--outname', default="rgClustal") + op.add_option('-s', '--out_order', default='ALIGNMENT') + op.add_option('-f', '--outform', default='CLUSTAL') + op.add_option('-e', '--seq_range_end',default=None) + op.add_option('-b', '--seq_range_start',default=None) + op.add_option('-l','--outlog',default='rgClustalw.log') + op.add_option('-q', '--outseqnos',default=None) + op.add_option('-d', '--dnarna',default='DNA') + + opts, args = op.parse_args() + assert opts.input <> None + assert os.path.isfile(opts.input) + c = Clustrunner(opts) + c.run() + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgClustalw.xml Mon Jan 27 09:28:58 2014 -0500 @@ -0,0 +1,131 @@ +<tool id="clustalw" name="ClustalW" version="0.1"> + <requirements> + <requirement type="package" version="2.1">clustalw2</requirement> + </requirements> + <description>multiple sequence alignment program for DNA or proteins</description> + <command interpreter="python"> + rgClustalw.py -i "$input" -o "$output" -s "$out_order" -l "$outlog" -t "$outname" -d "$dnarna" + #if ($range.mode=="part") +-b "$range.seq_range_start" -e "$range.seq_range_end" + #end if + #if ($outcontrol.outform=="clustal") +-f "CLUSTAL" + #if ($outcontrol.out_seqnos=="ON") +-q "ON" + #end if + #end if + #if ($outcontrol.outform=="phylip") +-f "PHYLIP" + #end if + #if ($outcontrol.outform=="fasta") +-f "FASTA" + #end if + </command> + <inputs> + <page> + <param format="fasta" name="input" type="data" label="Fasta File" /> + <param name="outname" label="Name for output files to make it easy to remember what you did" type="text" size="50" value="Clustal_run" /> + <param name="dnarna" type="select" label="Data Type"> + <option value="DNA" selected="True">DNA nucleotide sequences</option> + <option value="PROTEIN">Protein sequences</option> + </param> + <conditional name="outcontrol"> + <param name="outform" type="select" label="Output alignment format"> + <option value="clustal" selected="True">Native Clustal output format</option> + <option value="phylip">Phylip format</option> + <option value="fasta">Fasta format</option> + </param> + <when value="fasta" /> + <when value="phylip" /> + <when value="clustal"> + <param name="out_seqnos" type="select" label="Show residue numbers in clustal format output"> + <option value="ON">yes</option> + <option value="OFF" selected="true">no</option> + </param> + </when> + </conditional> + <param name="out_order" type="select" label="Output Order"> + <option value="ALIGNED">aligned</option> + <option value="INPUT">same order as input file</option> + </param> + + <conditional name="range"> + <param name="mode" type="select" label="Output complete alignment (or specify part to output)"> + <option value="complete">complete alignment</option> + <option value="part">only part of the alignment</option> + </param> + <when value="complete"> + </when> + <when value="part"> + <param name="seq_range_start" size="5" type="integer" value="1" label="start point" help="sequence range to write"> + </param> + <param name="seq_range_end" size="5" type="integer" value="99999" label="end point" > + </param> + </when> + </conditional> + </page> + </inputs> + <outputs> + <data format="clustal" name="output" label="${outname}_output.${outcontrol.outform}"> + <change_format> + <when input="outcontrol.outform" value="phylip" format="phylip" /> + <when input="outcontrol.outform" value="fasta" format="fasta" /> + </change_format> + </data> + <data format="txt" name="outlog" label="${outname}_clustal_log.txt"/> + </outputs> + <tests> + <test> + <param name="input" value="rgClustal_testin.fasta" /> + <param name="outname" value="" /> + <param name="outform" value="fasta" /> + <param name="dnarna" value="DNA" /> + <param name="mode" value="complete" /> + <param name="out_order" value="ALIGNED" /> + <output name="output" file="rgClustal_testout.fasta" ftype="fasta" /> + <output name="outlog" file="rgClustal_testout.log" ftype="txt" lines_diff="5" /> + </test> + </tests> + <help> + +**Note** + +This tool allows you to run a multiple sequence alignment with ClustalW2 (see Clustsrc_) using the default options. + +For a tutorial introduction, see ClustalW2_ + +You can align DNA or protein sequences in the input file which should be multiple sequences to be aligned in a fasta file + +A log will be output to your history showing the output Clustal would normally write to standard output. + +The alignments will appear as a clustal format file or optionally, as phylip or fasta format files in your history. If you choose fasta as +the output format, you can create a 'Logo' image using the Sequence Logo tool. + +If Clustal format is chosen, you have the option of adding basepair counts to the output + +A subsequence of the alignment can be output by setting the Output complete parameter to "Partial" and defining the offset and end of the subsequence to be output + +---- + +**Attribution** + +Clustal attribution and associated documentation are available at Clustsrc_ + +The first iteration of this Galaxy wrapper was written by Hans-Rudolf Hotz - see Clustfirst_ + +It was modified by Ross Lazarus for the rgenetics project - tests and some additional parameters were added + +This wrapper is released licensed under the LGPL_ + +.. _ClustalW2: http://www.ebi.ac.uk/2can/tutorials/protein/clustalw.html + +.. _Clustsrc: http://www.clustal.org + +.. _Clustfirst: http://lists.bx.psu.edu/pipermail/galaxy-dev/2010-November/003732.html + +.. _LGPL: http://www.gnu.org/copyleft/lesser.html + + </help> + +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rgClustal_testin.fasta Mon Jan 27 09:28:58 2014 -0500 @@ -0,0 +1,25 @@ +>c_briggsae-chrII(+)/43862-46313 +ATGAGCTTCCACAAAAGCATGAGCTTTCTCAGCTTCTGCCACATCAGCATTCAAATGATC +>c_remanei-Crem_Contig172(-)/123228-124941 +ATGAGCCTCTACAACCGCATGATTCTTTTCAGCCTCTGCCACGTCCGCATTCAAATGCTC +>c_brenneri-Cbre_Contig60(+)/627772-630087 +ATGAGCCTCCACAACAGCATGATTTTTCTCGGCTTCCGCCACATCCGCATTCAAATGATC +>c_elegans-II(+)/9706834-9708803 +ATGAGCCTCTACTACAGCATGATTCTTCTCAGCTTCTGCAACGTCAGCATTCAGATGATC +>c_briggsae-chrIfooI(+)/43862-46313 +CGCACAAATATGATGCACAAATCCACAACCTAAAGCATCTCCGATAACGTTGACCGAAGT +>c_remanei-Crem_Contig172foo(-)/123228-124941 +AGCACAAATGTAATGAACGAATCCGCATCCCAACGCATCGCCAATCACATTCACAGATGT +>c_brenneri-Cbre_Contig60gak(+)/627772-630087 +CGCACAAATGTAGTGGACAAATCCGCATCCCAAAGCGTCTCCGATAACATTTACCGAAGT +>c_elegans-II(+)more/9706834-9708803 +TGCACAAATGTGATGAACGAATCCACATCCCAATGCATCACCGATCACATTGACAGATGT +>c_briggsae-chrII(+)bar/43862-46313 +CCGGAGTCGATCCCTGAAT----------------------------------------- +>c_remanei-Crem_Contig172zot(-)/123228-124941 +ACGAAGTCGGTCCCTATAAGGTATGATTTTATATGA----TGTACCATAAGGAAATAGTC +>c_brenneri-Cbre_Contig60fee(+)/627772-630087 +ACGAAGTCGATCCCTGAAA---------TCAGATGAGCGGTTGACCA---GAGAACAACC +>c_elegans-II(+)meh/9706834-9708803 +ACGAAGTCGGTCCCTGAAC--AATTATTT----TGA----TATA---GAAAGAAACGGTA +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rgClustal_testout.fasta Mon Jan 27 09:28:58 2014 -0500 @@ -0,0 +1,48 @@ +>c_briggsae-chrII_+_ +---ATGAGCTTCCACAAAAGCATGAGCTTT +CTCAGCTTCTGCCACATCAGCATTCAAATG +ATC +>c_brenneri-Cbre_Contig60_+_ +---ATGAGCCTCCACAACAGCATGATTTTT +CTCGGCTTCCGCCACATCCGCATTCAAATG +ATC +>c_remanei-Crem_Contig172_-_ +---ATGAGCCTCTACAACCGCATGATTCTT +TTCAGCCTCTGCCACGTCCGCATTCAAATG +CTC +>c_elegans-II_+_ +---ATGAGCCTCTACTACAGCATGATTCTT +CTCAGCTTCTGCAACGTCAGCATTCAGATG +ATC +>c_briggsae-chrII_+_bar +---CCGGAGTCGATCCCTGAAT-------- +------------------------------ +--- +>c_brenneri-Cbre_Contig60fee_+_ +---ACGAAGTCGATCCCTGAAA-------- +-TCAGATGAGCGGTTGACCA---GAGAACA +ACC +>c_remanei-Crem_Contig172zot_-_ +---ACGAAGTCGGTCCCTATAAGGTATGAT +TTTATATGA----TGTACCATAAGGAAATA +GTC +>c_elegans-II_+_meh +---ACGAAGTCGGTCCCTGAAC--AATTAT +TT----TGA----TATA---GAAAGAAACG +GTA +>c_briggsae-chrIfooI_+_ +CGCACAAATATGATGCACAAATCCACAACC +TAAAGCATCTCCGATAACGTTGACCGAAGT +--- +>c_brenneri-Cbre_Contig60gak_+_ +CGCACAAATGTAGTGGACAAATCCGCATCC +CAAAGCGTCTCCGATAACATTTACCGAAGT +--- +>c_remanei-Crem_Contig172foo_-_ +AGCACAAATGTAATGAACGAATCCGCATCC +CAACGCATCGCCAATCACATTCACAGATGT +--- +>c_elegans-II_+_more +TGCACAAATGTGATGAACGAATCCACATCC +CAATGCATCACCGATCACATTGACAGATGT +---
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rgClustal_testout.log Mon Jan 27 09:28:58 2014 -0500 @@ -0,0 +1,144 @@ + + + + CLUSTAL 2.1 Multiple Sequence Alignments + + +Sequence type explicitly set to DNA +Sequence format is Pearson +Sequence 1: c_briggsae-chrII_+_/43862-46313 60 bp +Sequence 2: c_remanei-Crem_Contig172_-_/123228-124941 60 bp +Sequence 3: c_brenneri-Cbre_Contig60_+_/627772-630087 60 bp +Sequence 4: c_elegans-II_+_/9706834-9708803 60 bp +Sequence 5: c_briggsae-chrIfooI_+_/43862-46313 60 bp +Sequence 6: c_remanei-Crem_Contig172foo_-_/123228-124941 60 bp +Sequence 7: c_brenneri-Cbre_Contig60gak_+_/627772-630087 60 bp +Sequence 8: c_elegans-II_+_more/9706834-9708803 60 bp +Sequence 9: c_briggsae-chrII_+_bar/43862-46313 60 bp +Sequence 10: c_remanei-Crem_Contig172zot_-_/123228-124941 60 bp +Sequence 11: c_brenneri-Cbre_Contig60fee_+_/627772-630087 60 bp +Sequence 12: c_elegans-II_+_meh/9706834-9708803 60 bp +Start of Pairwise alignments +Aligning... + +Sequences (1:2) Aligned. Score: 80 +Sequences (1:3) Aligned. Score: 88 +Sequences (1:4) Aligned. Score: 83 +Sequences (1:5) Aligned. Score: 21 +Sequences (1:6) Aligned. Score: 20 +Sequences (1:7) Aligned. Score: 23 +Sequences (1:8) Aligned. Score: 18 +Sequences (1:9) Aligned. Score: 21 +Sequences (1:10) Aligned. Score: 16 +Sequences (1:11) Aligned. Score: 25 +Sequences (1:12) Aligned. Score: 10 +Sequences (2:3) Aligned. Score: 85 +Sequences (2:4) Aligned. Score: 86 +Sequences (2:5) Aligned. Score: 21 +Sequences (2:6) Aligned. Score: 20 +Sequences (2:7) Aligned. Score: 25 +Sequences (2:8) Aligned. Score: 20 +Sequences (2:9) Aligned. Score: 36 +Sequences (2:10) Aligned. Score: 16 +Sequences (2:11) Aligned. Score: 22 +Sequences (2:12) Aligned. Score: 17 +Sequences (3:4) Aligned. Score: 85 +Sequences (3:5) Aligned. Score: 13 +Sequences (3:6) Aligned. Score: 20 +Sequences (3:7) Aligned. Score: 25 +Sequences (3:8) Aligned. Score: 20 +Sequences (3:9) Aligned. Score: 36 +Sequences (3:10) Aligned. Score: 16 +Sequences (3:11) Aligned. Score: 18 +Sequences (3:12) Aligned. Score: 25 +Sequences (4:5) Aligned. Score: 13 +Sequences (4:6) Aligned. Score: 11 +Sequences (4:7) Aligned. Score: 20 +Sequences (4:8) Aligned. Score: 10 +Sequences (4:9) Aligned. Score: 31 +Sequences (4:10) Aligned. Score: 17 +Sequences (4:11) Aligned. Score: 29 +Sequences (4:12) Aligned. Score: 14 +Sequences (5:6) Aligned. Score: 73 +Sequences (5:7) Aligned. Score: 83 +Sequences (5:8) Aligned. Score: 80 +Sequences (5:9) Aligned. Score: 31 +Sequences (5:10) Aligned. Score: 14 +Sequences (5:11) Aligned. Score: 14 +Sequences (5:12) Aligned. Score: 12 +Sequences (6:7) Aligned. Score: 80 +Sequences (6:8) Aligned. Score: 88 +Sequences (6:9) Aligned. Score: 26 +Sequences (6:10) Aligned. Score: 16 +Sequences (6:11) Aligned. Score: 25 +Sequences (6:12) Aligned. Score: 12 +Sequences (7:8) Aligned. Score: 78 +Sequences (7:9) Aligned. Score: 31 +Sequences (7:10) Aligned. Score: 10 +Sequences (7:11) Aligned. Score: 12 +Sequences (7:12) Aligned. Score: 12 +Sequences (8:9) Aligned. Score: 31 +Sequences (8:10) Aligned. Score: 10 +Sequences (8:11) Aligned. Score: 14 +Sequences (8:12) Aligned. Score: 12 +Sequences (9:10) Aligned. Score: 63 +Sequences (9:11) Aligned. Score: 84 +Sequences (9:12) Aligned. Score: 78 +Sequences (10:11) Aligned. Score: 64 +Sequences (10:12) Aligned. Score: 76 +Sequences (11:12) Aligned. Score: 46 +Guide tree file created: [infile_copy.dnd] + +There are 11 groups +Start of Multiple Alignment + +Aligning... +Group 1: Sequences: 2 Score:1045 +Group 2: Sequences: 2 Score:1016 +Group 3: Sequences: 4 Score:1001 +Group 4: Sequences: 2 Score:313 +Group 5: Sequences: 2 Score:731 +Group 6: Sequences: 4 Score:516 +Group 7: Sequences: 8 Score:344 +Group 8: Sequences: 2 Score:1016 +Group 9: Sequences: 2 Score:1054 +Group 10: Sequences: 4 Score:945 +Group 11: Sequences: 12 Score:380 +Alignment Score 6283 + +CLUSTAL-Alignment file created [/share/shared/galaxy/database/files/002/dataset_2801.dat] + + +Clustal created the following dnd file for your information: +( +( +c_briggsae-chrII_+_/43862-46313:0.07349, +c_brenneri-Cbre_Contig60_+_/627772-630087:0.04317) +:0.02387, +( +c_remanei-Crem_Contig172_-_/123228-124941:0.06114, +c_elegans-II_+_/9706834-9708803:0.07219) +:0.01779, +( +( +( +c_briggsae-chrIfooI_+_/43862-46313:0.10368, +c_brenneri-Cbre_Contig60gak_+_/627772-630087:0.06298) +:0.01654, +( +c_remanei-Crem_Contig172foo_-_/123228-124941:0.05765, +c_elegans-II_+_more/9706834-9708803:0.05902) +:0.06262) +:0.31533, +( +( +c_briggsae-chrII_+_bar/43862-46313:0.02327, +c_brenneri-Cbre_Contig60fee_+_/627772-630087:0.13463) +:0.05016, +( +c_remanei-Crem_Contig172zot_-_/123228-124941:0.11667, +c_elegans-II_+_meh/9706834-9708803:0.11737) +:0.12013) +:0.20951) +:0.30133); +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Jan 27 09:28:58 2014 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="clustalw2" version="2.1"> + <repository changeset_revision="09243d89e7a4" name="package_clustalw_2_1" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>