Mercurial > repos > peterjc > get_orfs_or_cdss

diff tools/get_orfs_or_cdss/get_orfs_or_cdss.xml @ 7:705a2e2df7fb draft
v0.1.1 fix typo; v0.1.0 BED output (Eric Rasche), NCBI genetic code 24; v0.0.7 embeds citation
author: peterjc
date: Thu, 30 Jul 2015 12:35:31 -0400
parents: 5208c15805ec
children: 09a8be9247ca
--- a/tools/get_orfs_or_cdss/get_orfs_or_cdss.xml	Thu Nov 21 10:47:53 2013 -0500
+++ b/tools/get_orfs_or_cdss/get_orfs_or_cdss.xml	Thu Jul 30 12:35:31 2015 -0400
@@ -1,18 +1,18 @@
-<tool id="get_orfs_or_cdss" name="Get open reading frames (ORFs) or coding sequences (CDSs)" version="0.0.5">
+<tool id="get_orfs_or_cdss" name="Get open reading frames (ORFs) or coding sequences (CDSs)" version="0.1.1">
     <description>e.g. to get peptides from ESTs</description>
     <requirements>
-        <requirement type="package" version="1.62">biopython</requirement>
+        <requirement type="package" version="1.65">biopython</requirement>
         <requirement type="python-module">Bio</requirement>
     </requirements>
-    <version_command interpreter="python">get_orfs_or_cdss.py --version</version_command>
-    <command interpreter="python">
-get_orfs_or_cdss.py $input_file $input_file.ext $table $ftype $ends $mode $min_len $strand $out_nuc_file $out_prot_file
-    </command>
     <stdio>
         <!-- Anything other than zero is an error -->
         <exit_code range="1:" />
         <exit_code range=":-1" />
     </stdio>
+    <version_command interpreter="python">get_orfs_or_cdss.py --version</version_command>
+    <command interpreter="python">
+get_orfs_or_cdss.py -i $input_file -f $input_file.ext --table $table -t $ftype -e $ends -m $mode --min_len $min_len -s $strand --on $out_nuc_file --op $out_prot_file --ob $out_bed_file
+    </command>
     <inputs>
         <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file (nucleotides)" help="FASTA, FASTQ, or SFF format." />
         <param name="table" type="select" label="Genetic code" help="Tables from the NCBI, these determine the start and stop codons">
@@ -33,6 +33,7 @@
             <option value="21">21. Trematode Mitochondrial</option>
             <option value="22">22. Scenedesmus obliquus</option>
             <option value="23">23. Thraustochytrium Mitochondrial</option>
+            <option value="24">24. Pterobranchia Mitochondrial</option>
         </param>
         <param name="ftype" type="select" value="True" label="Look for ORFs or CDSs">
             <option value="ORF">Look for ORFs (check for stop codons only, ignore start codons)</option>
@@ -49,7 +50,7 @@
             <option value="one">First ORF/CDS from each sequence with the maximum length</option>
         </param>
         <param name="min_len" type="integer" size="5" value="30" label="Minimum length ORF/CDS (in amino acids, e.g. 30 aa = 90 bp plus any stop codon)" />
-        <param name="strand" type="select" label="Strand to search" help="Use the forward only option if your sequence directionality is known (e.g. from poly-A tails, or strand specific RNA sequencing.">
+        <param name="strand" type="select" label="Strand to search" help="Use the forward only option if your sequence directionality is known (e.g. from poly-A tails, or strand specific RNA sequencing).">
             <option value="both">Search both the forward and reverse strand</option>
             <option value="forward">Only search the forward strand</option>
             <option value="reverse">Only search the reverse strand</option>
@@ -58,6 +59,7 @@
     <outputs>
         <data name="out_nuc_file" format="fasta" label="${ftype.value}s (nucleotides)" />
         <data name="out_prot_file" format="fasta" label="${ftype.value}s (amino acids)" />
+        <data name="out_bed_file" format="bed6" label="${ftype.value}s (bed)" />
     </outputs>
     <tests>
         <test>
@@ -70,6 +72,7 @@
             <param name="strand" value="forward" />
             <output name="out_nuc_file" file="get_orf_input.t1_nuc_out.fasta" />
             <output name="out_prot_file" file="get_orf_input.t1_prot_out.fasta" />
+            <output name="out_bed_file" file="get_orf_input.t1_bed_out.bed" />
         </test>
         <test>
             <param name="input_file" value="get_orf_input.fasta" />
@@ -80,7 +83,8 @@
             <param name="min_len" value="10" />
             <param name="strand" value="forward" />
             <output name="out_nuc_file" file="get_orf_input.t11_nuc_out.fasta" />
-            <output    name="out_prot_file" file="get_orf_input.t11_prot_out.fasta" />
+            <output name="out_prot_file" file="get_orf_input.t11_prot_out.fasta" />
+            <output name="out_bed_file" file="get_orf_input.t11_bed_out.bed" />
         </test>
         <test>
             <param name="input_file" value="get_orf_input.fasta" />
@@ -92,6 +96,7 @@
             <param name="strand" value="forward" />
             <output name="out_nuc_file" file="get_orf_input.t11_open_nuc_out.fasta" />
             <output name="out_prot_file" file="get_orf_input.t11_open_prot_out.fasta" />
+            <output name="out_bed_file" file="get_orf_input.t11_open_bed_out.bed" />
         </test>
         <test>
             <param name="input_file" value="Ssuis.fasta" />
@@ -103,6 +108,7 @@
             <param name="strand" value="both" />
             <output name="out_nuc_file" file="get_orf_input.Suis_ORF.nuc.fasta" />
             <output name="out_prot_file" file="get_orf_input.Suis_ORF.prot.fasta" />
+            <output name="out_bed_file" file="get_orf_input.Suis_ORF.bed" />
         </test>
     </tests>
     <help>
@@ -134,7 +140,7 @@
 potential start codon will be used, giving the longest possible CDS within
 each ORF, and thus the longest possible protein sequence. This is useful
 for things like BLAST or domain searching, but since this may not be the
-correct start codon may not be appropriate for signal peptide detection
+correct start codon, it may not be appropriate for signal peptide detection
 etc.
 
 **Example Usage**
@@ -145,7 +151,7 @@
 encode one protein as a single ORF/CDS, which you wish to extract (and
 perhaps translate into amino acids).
 
-If your RNS-Seq data was strand specific, and assembled taking this into
+If your RNA-Seq data was strand specific, and assembled taking this into
 account, you should only search for ORFs/CDSs on the forward strand.
 
 **Citation**
@@ -168,4 +174,8 @@
 This tool is available to install into other Galaxy Instances via the Galaxy
 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/get_orfs_or_cdss
     </help>
+    <citations>
+        <citation type="doi">10.7717/peerj.167</citation>
+        <citation type="doi">10.1093/bioinformatics/btp163</citation>
+    </citations>
 </tool>
author	peterjc
date	Thu, 30 Jul 2015 12:35:31 -0400
parents	5208c15805ec
children	09a8be9247ca