changeset 0:b46d3df3eb9e

Initial commit
author bjoern-gruening
date Wed, 11 Jan 2012 05:57:32 -0500
parents
children 65d282ef088e
files readme.txt tool_conf.xml tools/aragorn.xml tools/tRNAscan.xml
diffstat 4 files changed, 425 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.txt	Wed Jan 11 05:57:32 2012 -0500
@@ -0,0 +1,80 @@
+Galaxy wrapper for t-RNA prediction tools
+=========================================
+
+This wrapper is copyright 2012 by Björn Grüning.
+
+This prepository contains wrapper for the command line tools of tRNAscan-SE and Arogorn.
+http://lowelab.ucsc.edu/tRNAscan-SE/
+http://130.235.46.10/ARAGORN/
+
+
+Dean Laslett and Bjorn Canback
+ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences Nucl. Acids Res. (2004) 32(1): 11-16
+doi:10.1093/nar/gkh152
+
+Todd M. Lowe and Sean R. Eddy
+tRNAscan-SE: A Program for Improved Detection of Transfer RNA Genes in Genomic Sequence Nucl. Acids Res. (1997) 25(5): 0955-964
+doi:10.1093/nar/25.5.0955 
+
+
+
+Installation
+============
+
+Please download tRNAscan-SE from the following URL and follow the install instructions.
+
+http://lowelab.ucsc.edu/software/tRNAscan-SE.tar.gz
+
+Arogorn can be download from:
+
+http://mbio-serv2.mbioekol.lu.se/ARAGORN/aragorn1.2.33.c
+With a recent GNU-Compiler (gcc) you can compile it with the following command.
+
+gcc -O3 -ffast-math -finline-functions -o aragorn aragorn1.2.33.c
+
+Please include aragorn and tRNAscan-SE into your PATH.
+export PATH=$PATH:/home/user/bin/aragorn/bin/
+
+
+To install the wrappers copy the files aragorn.xml and tRNAscan.xml in the galaxy tools
+folder and modify the tools_conf.xml file to make the tool available to Galaxy.
+For example add the following lines:
+
+<tool file="trna_prediction/aragorn.xml" />
+<tool file="trna_prediction/tRNAscan.xml" />
+
+
+
+History
+=======
+
+tRNAscan:
+v0.1 - Initial public release
+
+aragorn:
+v0.1 - Initial public release
+
+
+
+
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_conf.xml	Wed Jan 11 05:57:32 2012 -0500
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<toolbox>
+  <section name="tRNA Prediction" id="tRNA_prediction">
+    <tool file="trna_prediction/aragorn.xml" />
+    <tool file="trna_prediction/tRNAscan.xml" />
+  </section>
+</toolbox>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/aragorn.xml	Wed Jan 11 05:57:32 2012 -0500
@@ -0,0 +1,124 @@
+<tool id="aragorn_trna" name="Aragon" version="0.1">
+	<description>Prediction of tRNAs</description>
+	<command>aragorn $input > $output</command>
+	<inputs>
+		<param name="input" type="data" format="fasta" label="Genome Sequence"/>
+	</inputs>
+	<outputs>
+		<data name="output" format="tabular"/>
+	</outputs>
+	<tests>
+		<test>
+		</test>
+	</tests>
+	<help>
+
+**What it does**
+
+This tool predicts, tRNA (and tmRNA) detection in nucleotide sequences.
+http://130.235.46.10/ARAGORN/
+-----	
+
+**Example**
+
+Suppose you have the following DNA formatted sequences::
+
+    >SQ   Sequence 8667507 BP; 1203558 A; 3121252 C; 3129638 G; 1213059 T; 0 other;
+    cccgcggagcgggtaccacatcgctgcgcgatgtgcgagcgaacacccgggctgcgcccg
+    ggtgttgcgctcccgctccgcgggagcgctggcgggacgctgcgcgtcccgctcaccaag
+    cccgcttcgcgggcttggtgacgctccgtccgctgcgcttccggagttgcggggcttcgc
+    cccgctaaccctgggcctcgcttcgctccgccttgggcctgcggcgggtccgctgcgctc
+    ccccgcctcaagggcccttccggctgcgcctccaggacccaaccgcttgcgcgggcctgg
+
+Running this tool will produce this::
+    --snip--
+    87.
+    
+                 c
+                c
+               a
+             g-c
+             g-c
+             g-c
+             c-g
+             g-c
+             a-t
+             t-a     ca
+            t   tgacc  a
+     ga    a    !!!!!  g
+    t  ctcg     actgg  c
+    g  !!!!    c     tt
+    g  gagc     t
+     aa    g     g
+            c-gag
+            t-a
+            t-a
+            c-g
+            g-c
+           t   c
+           t   a
+            cac
+ 
+ 
+ 
+    tRNA-Val(cac)
+    74 bases, %GC = 58.1
+    Sequence [6669703,6669776]
+ 
+
+
+
+    tRNA Anticodon Frequency
+    AAA Phe       GAA Phe  1    CAA Leu  1    TAA Leu  1    
+    AGA Ser       GGA Ser  1    CGA Ser  2    TGA Ser  1    
+    ACA Cys       GCA Cys  2    CCA Trp  2    TCA seC       
+    ATA Tyr       GTA Tyr  1    CTA Pyl       TTA Stop      
+    AAG Leu       GAG Leu  3    CAG Leu  1    TAG Leu  2    
+    AGG Pro       GGG Pro  2    CGG Pro  2    TGG Pro  2    
+    ACG Arg  1    GCG Arg  2    CCG Arg  1    TCG Arg       
+    ATG His       GTG His  2    CTG Gln  2    TTG Gln  1    
+    AAC Val       GAC Val  3    CAC Val  2    TAC Val  1    
+    AGC Ala       GGC Ala  2    CGC Ala  3    TGC Ala  1    
+    ACC Gly       GCC Gly  5    CCC Gly  1    TCC Gly  2    
+    ATC Asp       GTC Asp  3    CTC Glu  2    TTC Glu  2    
+    AAT Ile       GAT Ile  3    CAT Met  6    TAT Ile       
+    AGT Thr       GGT Thr  2    CGT Thr  1    TGT Thr  2    
+    ACT Ser       GCT Ser  1    CCT Arg  1    TCT Arg  1    
+    ATT Asn       GTT Asn  3    CTT Lys  3    TTT Lys  2    
+    Ambiguous: 1
+
+    tRNA Codon Frequency
+    TTT Phe       TTC Phe  1    TTG Leu  1    TTA Leu  1    
+    TCT Ser       TCC Ser  1    TCG Ser  2    TCA Ser  1    
+    TGT Cys       TGC Cys  2    TGG Trp  2    TGA seC       
+    TAT Tyr       TAC Tyr  1    TAG Pyl       TAA Stop      
+    CTT Leu       CTC Leu  3    CTG Leu  1    CTA Leu  2    
+    CCT Pro       CCC Pro  2    CCG Pro  2    CCA Pro  2    
+    CGT Arg  1    CGC Arg  2    CGG Arg  1    CGA Arg       
+    CAT His       CAC His  2    CAG Gln  2    CAA Gln  1    
+    GTT Val       GTC Val  3    GTG Val  2    GTA Val  1    
+    GCT Ala       GCC Ala  2    GCG Ala  3    GCA Ala  1    
+    GGT Gly       GGC Gly  5    GGG Gly  1    GGA Gly  2    
+    GAT Asp       GAC Asp  3    GAG Glu  2    GAA Glu  2    
+    ATT Ile       ATC Ile  3    ATG Met  6    ATA Ile       
+    ACT Thr       ACC Thr  2    ACG Thr  1    ACA Thr  2    
+    AGT Ser       AGC Ser  1    AGG Arg  1    AGA Arg  1    
+    AAT Asn       AAC Asn  3    AAG Lys  3    AAA Lys  2    
+    Ambiguous: 1
+
+    Number of tRNA genes = 86
+    tRNA GC range = 50.0% to 85.1%
+    Number of tmRNA genes = 1
+
+-------
+
+**References**
+
+Dean Laslett and Bjorn Canback
+ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences Nucl. Acids Res. (2004) 32(1): 11-16
+doi:10.1093/nar/gkh152 
+
+
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/tRNAscan.xml	Wed Jan 11 05:57:32 2012 -0500
@@ -0,0 +1,214 @@
+<tool id="trnascan" name="tRNAscan" version="0.1">
+  <description>tRNA Scan</description>
+  <command >tRNAscan-SE $organism $mode $showPrimSecondOpt $disablePseudo $showCodons -d -Q -y -q -b -o $output $inputfile > /dev/null </command>
+  <inputs>
+	<param name="inputfile" type="data" format="fasta" label="Genome Sequence" help="Dataset missing? See TIP below"/>
+	<param name="organism" type="select" format="text">
+		<label>Select Organsim</label>
+		<option value="-G">general tRNA model</option>
+		<option value="-B">Bacterial</option>
+		<option value="-A">Archaeal</option>
+		<option value="-O">Mito/Chloroplast </option>
+	</param>
+	<param name="mode" type="select" format="text">
+		<label>Select Mode</label>		
+		<option value=""> Default</option>
+		<option value="-C"> Cove only (very slow)</option>
+		<option value="-T"> tRNAscan only</option>
+		<option value="-E "> EufindtRNA only</option>
+	</param>		
+	<param name='disablePseudo' type='boolean' label='Disable pseudo gene checking' truevalue='-D' falsevalue='' >	</param>
+	<param name='showPrimSecondOpt' type='boolean'  label='Show primary and secondary structure components to Cove scores ' truevalue="-H" falsevalue=''></param>
+	<param name='showCodons' type='boolean' label='Show codons instead of tRNA anticodons' truevalue='-N' falsevalue=''></param>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="output" />
+  </outputs>
+  <tests>
+	<test>
+		<param name="inputfile" ftype="fasta" value='trna/trnaTestData.fa' />
+		<param name="organism" value='general tRNA model' />
+		<param name="mode" value="" />		
+		<param name='disablePseudo' value=''/>
+		<param name='showPrimSecondOpt' value="" />
+		<param name='showCodons' value='' />
+		<output name="output" file='trna/trnaTestOutput.dat'/>
+	</test>	
+ </tests>
+
+<help>
+
+.. class:: warningmark
+
+**TIP** This tool requires *fasta* format.
+
+-----
+
+**What it does**
+
+	tRNAscan-SE was designed to make rapid, sensitive searches of genomic
+	sequence feasible using the selectivity of the Cove analysis package.
+	We have optimized search sensitivity with eukaryote cytoplasmic and
+	eubacterial sequences, but it may be applied more broadly with a
+	slight reduction in sensitivity .
+	http://lowelab.ucsc.edu/tRNAscan-SE/
+
+-----
+
+**Organisim**
+
+- use general tRNA model:
+
+	This option selects the general tRNA covariance model that was trained
+	on tRNAs from all three phylogenetic domains (archaea, bacteria, and
+	eukarya).  This mode can be used when analyzing a mixed collection of
+	sequences from more than one phylogenetic domain, with only slight
+	loss of sensitivity and selectivity.  The original publication
+	describing this program and tRNAscan-SE version 1.0 used this general
+	tRNA model exclusively.  If you wish to compare scores to those found
+	in the paper or scans using v1.0, use this option.  Use of this option
+	is compatible with all other search mode options described in this
+	section.
+
+- search for bacterial tRNAs
+
+	This option selects the bacterial covariace model for tRNA analysis,
+	and loosens the search parameters for EufindtRNA to improve detection
+	of bacterial tRNAs.  Use of this mode with bacterial sequences
+	will also improve bounds prediction of the 3' end (the terminal CAA
+	triplet).
+
+- search for archaeal tRNAs
+
+	This option selects an archaeal-specific covariance model for tRNA
+	analysis, as well as slightly loosening the EufindtRNA search
+	cutoffs.
+
+- search for organellar (mitochondrial/chloroplast) tRNAs
+
+	This parameter bypasses the fast first-pass scanners that are poor at
+	detecting organellar tRNAs and runs Cove analysis only.  Since true
+	organellar tRNAs have been found to have Cove scores between 15 and 20
+	bits, the search cutoff is lowered from 20 to 15 bits.  Also,
+	pseudogene checking is disabled since it is only applicable to
+	eukaryotic cytoplasmic tRNA pseudogenes.  Since Cove-only mode is
+	used, searches will be very slow (see -C option below) relative to the
+	default mode.
+
+------
+
+**Mode**
+
+- search using Cove analysis only (max sensitivity, slow)
+
+	Directs tRNAscan-SE to analyze sequences using Cove analysis only.
+	This option allows a slightly more sensitive search than the default
+	tRNAscan + EufindtRNA -> Cove mode, but is much slower (by approx. 250
+	to 3,000 fold).  Output format and other program defaults are
+	otherwise identical to the normal analysis.
+
+- search using Eukaryotic tRNA finder (EufindtRNA) only:
+
+	This option runs EufindtRNA alone to search for tRNAs.  Since Cove is
+	not being used as a secondary filter to remove false positives, this
+	run mode defaults to "Normal" parameters which more closely
+	approximates the sensitivity and selectivity of the original algorithm
+	describe by Pavesi and colleagues.
+
+- search using tRNAscan only (defaults to strict search params)
+
+	Directs tRNAscan-SE to use only tRNAscan to analyze sequences.  This
+	mode will cause tRNAscan to default to using "strict" parameters
+	(similar to tRNAscan version 1.3 operation).  This mode of operation
+	is faster (about 3-5 times faster than default mode analysis), but
+	will result in approximately 0.2 to 0.6 false positive tRNAs per Mbp,
+	decreased sensitivity, and less reliable prediction of anticodons,
+	tRNA isotype, and introns.
+
+-----
+
+**disable pseudogene checking**
+
+	Manually disable checking tRNAs for poor primary or secondary
+	structure scores often indicative of eukaryotic pseudogenes.  This
+	will slightly speed the program and may be necessary for non-eukaryotic
+	sequences that are flagged as possible pseudogenes but are known to be
+	functional tRNAs.
+
+-----
+
+**Show both primary and secondary structure score components to covariance model bit scores**
+
+	This option displays the breakdown of the two components of the
+	covariance model bit score.  Since tRNA pseudogenes often have one
+	very low component (good secondary structure but poor primary sequence
+	similarity to the tRNA model, or vice versa), this information may be
+	useful in deciding whether a low-scoring tRNA is likely to be a
+	pseudogene.  The heuristic pseudogene detection filter uses this
+	information to flag possible pseudogenes -- use this option to see why
+	a hit is marked as a possible pseudogene.  The user may wish to
+	examine score breakdowns from known tRNAs in the organism of interest 
+	to get a frame of reference.
+
+-----
+
+**Show codons instead of tRNA anticodons**
+
+	This option causes tRNAscan-SE to output a tRNA's corresponding codon
+	in place of its anticodon.
+
+-----
+
+**Example**
+
+* input::
+
+	-Genome Sequence
+
+	CELF22B7  C.aenorhabditis elegans (Bristol N2) cosmid F22B7
+	GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT
+	GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT
+	TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT
+	TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC
+	GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA
+	ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG
+	AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA
+	CAATTCCTGATTTACGAACATCTTCTTCAAGCATTCTACAGATTTCTTGA
+	TGCTCTTCTAGGAGGATGTTGAAATCCGAAGTTGGAGAAAAAGTTCTCTC
+	AACTGAAATGCTTTTTCTTCGTGGATCCGATTCAGATGGACGACCTGGCA
+	GTCCGAGAGCCGTTCGAAGGAAAGATTCTTGTGAGAGAGGCGTGAAACAC
+	AAAGGGTATAGGTTCTTCTTCAGATTCATATCACCAACAGTTTGAATATC
+	CATTGCTTTCAGTTGAGCTTCGCATACACGACCAATTCCTCCAACCTAAA
+	AAATTATCTAGGTAAAACTAGAAGGTTATGCTTTAATAGTCTCACCTTAC
+	GAATCGGTAAATCCTTCAAAAACTCCATAATCGCGTTTTTATCATTTTCT
+	.....
+	- organisim : Mixed (general tRNA model)
+	- mode : Default
+	- disable pseudogene checking : not checked
+	- Show both primary and secondary structure score components to covariance model bit scores : not checked
+	- Show codons instead of tRNA anticodons : not checked
+
+* output::
+
+	Sequence 		tRNA 	Bounds	tRNA	Anti	Intron Bounds	Cove	Hit
+	Name     	tRNA #	Begin	End  	Type	Codon	Begin	End	Score	Origin
+	-------- 	------	---- 	------	----	-----	-----	----	------	------
+	CELF22B7 	1	12619	12738	Leu	CAA	12657	12692	55.12	Bo
+	CELF22B7 	2	19480	19561	Ser	AGA	0	0	66.90	Bo
+	CELF22B7 	3	26367	26439	Phe	GAA	0	0	73.88	Bo
+	CELF22B7 	4	26992	26920	Phe	GAA	0	0	73.88	Bo
+	CELF22B7 	5	23765	23694	Pro	CGG	0	0	60.58	Bo
+
+
+
+-------
+
+**References**
+
+Todd M. Lowe and Sean R. Eddy
+tRNAscan-SE: A Program for Improved Detection of Transfer RNA Genes in Genomic Sequence Nucl. Acids Res. (1997) 25(5): 0955-964
+doi:10.1093/nar/25.5.0955 
+
+ </help>
+
+</tool>