diff gstf_preparation.xml @ 10:e8e75a79de59 draft

"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit 9c8611fee927883f50bc6955771aa69df1ce8457"
author earlhaminst
date Thu, 31 Oct 2019 08:16:51 -0400
parents 92f3966d5bc3
children dbe37a658cd2
line wrap: on
line diff
--- a/gstf_preparation.xml	Wed Oct 17 07:31:29 2018 -0400
+++ b/gstf_preparation.xml	Thu Oct 31 08:16:51 2019 -0400
@@ -1,7 +1,6 @@
 <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.4.1">
     <description>converts data for the workflow</description>
-    <command detect_errors="exit_code">
-<![CDATA[
+    <command detect_errors="exit_code"><![CDATA[
 python '$__tool_directory__/gstf_preparation.py'
 #for $q in $queries
     --gff3 '${q.genome}:${q.gff3_input}'
@@ -22,12 +21,11 @@
 #end if
 #if $regions
     --regions '$regions'
+    --ff '$filtered_fasta'
 #end if
 -o '$output_db'
 --of '$output_fasta'
---ff '$filtered_fasta'
-]]>
-    </command>
+    ]]></command>
 
     <inputs>
         <repeat name="queries" title="GFF3 dataset">
@@ -40,58 +38,56 @@
         <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding CDS datasets in FASTA format" help="Each FASTA header line should start with a transcript id" />
         <param name="longestCDS" type="boolean" checked="false" label="Keep only the longest CDS per gene" />
         <param name="headers" type="boolean" checked="true" label="Change the header line of the FASTA sequences to the &gt;TranscriptId_species format" help="As required by TreeBest, part of the GeneSeqToFamily workflow" />
-        <param name="regions" type="text" optional="true" label="Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered" help="Region IDs are in the `seqid` column for GFF3 and in the `seq_region_name` field in JSON. This is typically used to filter chromosomes with a non-standard genetic code, like mitochondria, to be analysed separately" />
+        <param name="regions" type="text" optional="true" label="Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered out" help="Region IDs are in the `seqid` column for GFF3 and in the `seq_region_name` field in JSON. This is typically used to filter out chromosomes with a non-standard genetic code, like mitochondria, to be analysed separately" />
     </inputs>
 
     <outputs>
-         <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" />
-         <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" />
-         <data name="filtered_fasta" format="fasta" label="${tool.name} on ${on_string}: filtered sequences" />
+        <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" />
+        <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" />
+        <data name="filtered_fasta" format="fasta" label="${tool.name} on ${on_string}: filtered sequences">
+            <filter>regions</filter>
+        </data>
     </outputs>
 
     <tests>
-        <test>
+        <test expect_num_outputs="2">
             <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
             <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
             <param name="genome" value="caenorhabditis_elegans" />
             <param name="longestCDS" value="false" />
             <param name="headers" value="true" />
 
-            <output name="output_db" file="test1.sqlite" compare="sim_size" />
+            <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" />
             <output name="output_fasta" file="test1.fasta" />
-            <output name="filtered_fasta" file="test1.ns.fasta" />
         </test>
-        <test>
+        <test expect_num_outputs="2">
             <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
             <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
             <param name="genome" value="caenorhabditis_elegans" />
             <param name="longestCDS" value="true" />
             <param name="headers" value="true" />
 
-            <output name="output_db" file="test1.sqlite" compare="sim_size" />
+            <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" />
             <output name="output_fasta" file="test1_longest.fasta" />
-            <output name="filtered_fasta" file="test1.ns.fasta" />
         </test>
-        <test>
+        <test expect_num_outputs="2">
             <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
             <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
             <param name="genome" value="caenorhabditis_elegans" />
             <param name="longestCDS" value="false" />
             <param name="headers" value="false" />
 
-            <output name="output_db" file="test1.sqlite" compare="sim_size" />
+            <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" />
             <output name="output_fasta" file="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
-            <output name="filtered_fasta" file="test1.ns.fasta" />
         </test>
-        <test>
+        <test expect_num_outputs="2">
             <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" />
             <param name="json" ftype="json" value="gene.json" />
             <param name="longestCDS" value="false" />
             <param name="headers" value="true" />
 
-            <output name="output_db" file="test4.sqlite" compare="sim_size" />
+            <output name="output_db" file="test4.sqlite" compare="sim_size" delta="30000" />
             <output name="output_fasta" file="test4.fasta" />
-            <output name="filtered_fasta" file="test4.ns.fasta" />
         </test>
         <test>
             <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" />
@@ -100,13 +96,22 @@
             <param name="headers" value="true" />
             <param name="regions" value="X" />
 
-            <output name="output_db" file="test5.sqlite" compare="sim_size" />
+            <output name="output_db" file="test4.sqlite" compare="sim_size" delta="30000" />
             <output name="output_fasta" file="test5_filtered.fasta" />
             <output name="filtered_fasta" file="test5.ns.fasta" />
         </test>
+        <test expect_num_outputs="2">
+            <param name="fasta_inputs" ftype="fasta" value="Mus_pahari.PAHARI_EIJ_v1.1.cds.all.shortened.fa" />
+            <param name="gff3_input" ftype="gff3" value="MGP_PahariEiJ_G0008413.1.gff3" />
+            <param name="genome" value="mus_pahari" />
+            <param name="longestCDS" value="true" />
+            <param name="headers" value="true" />
+
+            <output name="output_db" file="test6.sqlite" compare="sim_size" delta="30000" />
+            <output name="output_fasta" file="test6.fasta" />
+        </test>
     </tests>
-    <help>
-<![CDATA[
+    <help><![CDATA[
 **What it does**
 
 This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format.
@@ -140,8 +145,7 @@
 .. class:: warningmark
 
 If a value in the **ID** and **Parent** attribute contains a colon, everything up to the first colon will be discarded.
-]]>
-    </help>
+    ]]></help>
     <citations>
     </citations>
 </tool>