diff gff3_extract_sequence.xml @ 4:34b80e483fb8 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:43:58 +0000
parents
children 759ba1430091
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_extract_sequence.xml	Mon Jun 05 02:43:58 2023 +0000
@@ -0,0 +1,69 @@
+<tool id="edu.tamu.cpt.gff3.export_seq" name="GFF3 Feature Sequence Export" version="19.1.0.0">
+    <description>Export corresponding sequence in genome from GFF3</description>
+    <macros>
+        <import>macros.xml</import>
+        <import>cpt-macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="aggressive"><![CDATA[
+@GENOME_SELECTOR_PRE@
+
+'$__tool_directory__/gff3_extract_sequence.py'
+@GENOME_SELECTOR@
+
+@INPUT_GFF@
+
+#if $feature_filter and $feature_filter is not None:
+--feature_filter '$feature_filter'
+#end if
+'$nodesc'
+> '$default']]></command>
+    <inputs>
+        <expand macro="genome_selector"/>
+        <expand macro="gff3_input"/>
+        <param label="Filter for specific feature types" name="feature_filter" type="text" help="Use 'nice_cds' if your features are coming from Apollo, however this will fail on non-Apollo data"/>
+        <param label="Remove description (use if blasting)" name="nodesc" type="boolean" truevalue="--nodesc" falsevalue=""/>
+    </inputs>
+    <outputs>
+        <data format="fasta" hidden="false" name="default"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="reference_genome_source" value="history"/>
+            <param name="genome_fasta" value="T7_ExtSeqIn.fasta"/>
+            <param name="gff3_data" value="T7_ExtSeqIn.gff3"/>
+            <param name="nodesc" value=""/>
+            <param name="feature_filter" value="CDS"/>
+            <output name="output" file="T7_ExtSeqOut.fasta"/>
+        </test>
+        <test>
+            <param name="reference_genome_source" value="history"/>
+            <param name="genome_fasta" value="Miro_ExtSeqIn.fa"/>
+            <param name="gff3_data" value="Miro_ExtSeqIn.gff3"/>
+            <param name="nodesc" value=""/>
+            <param name="feature_filter" value="CDS"/>
+            <output name="output" file="Miro_ExtSeqOut1.fa"/>
+        </test>
+        <test>
+            <param name="reference_genome_source" value="history"/>
+            <param name="genome_fasta" value="Miro_ExtSeqIn.fa"/>
+            <param name="gff3_data" value="Miro_ExtSeqIn.gff3"/>
+            <param name="feature_filter" value="CDS"/>
+            <param name="nodesc" value="--nodesc"/>
+            <output name="output" file="Miro_ExtSeqOut2.fa"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+From the FASTA sequence for a genome, extracts the *nucleotide* sequences for 
+all CDSs in an input GFF3 and outputs them as a multi-FASTA formatted file.
+
+The filter for specific feature types was designed for data retrieved from Apollo. 
+Using ‘unique_cds’ extracts the sequence for all CDS. Using ‘nice_cds’ will extract 
+the sequence only for CDS features with a start codon (recommended). If a gene from
+Apollo has multiple CDSs, the tool will append a "_##" to the end of subsequent genes.
+
+      ]]></help>
+    <expand macro="citations"/>
+</tool>