diff cherry_pick_fasta.xml @ 3:c282a8a47dd9 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
author artbio
date Fri, 21 May 2021 09:34:14 +0000
parents 321cad0eb507
children ba6c4aeb22ea
line wrap: on
line diff
--- a/cherry_pick_fasta.xml	Tue Mar 16 23:25:57 2021 +0000
+++ b/cherry_pick_fasta.xml	Fri May 21 09:34:14 2021 +0000
@@ -1,15 +1,23 @@
-<tool id="cherry_pick_fasta" name="Pick Fasta sequences" version="2.1.0">
+<tool id="cherry_pick_fasta" name="Pick Fasta sequences" version="3.0.0">
   <description>with header satisfying a string query</description>
   <requirements>
-      <requirement type="package" version="3.7.6">python</requirement>
+        <requirement type="package" version="1.70">biopython</requirement>
   </requirements>
   <command interpreter="python">cherry_pick_fasta.py
                                    --input $input
                                    --searchfor '$search.searchfor'
                                    #if $search.options_selector == 'single':
-                                       --query-string '$search.query'
+                                       #if $search.match == 'exact':
+                                           --query-string '$search.query' --mode exact
+                                       #else:
+                                           --query-string '$search.query' --mode includes
+                                       #end if
                                    #else:
-                                       --query-file '$search.query'
+                                       #if $search.match == 'exact':
+                                           --query-file '$search.query' --mode exact
+                                       #else:
+                                           --query-file '$search.query' --mode includes
+                                       #end if
                                    #end if
                                    --output $output
   </command>
@@ -18,11 +26,19 @@
     <param name="input" type="data" format="fasta" label="Source file" help="Fasta file to parse" />
 
     <conditional name="search">
-        <param name="options_selector" type="select" display="radio" label="by single term or file of terms">
-            <option value="single" selected="True">single term</option>
-            <option value="textdataset">terms in a text dataset</option>
+        <param name="options_selector" type="select" display="radio" label="for a">
+            <option value="single" selected="True">single string</option>
+            <option value="textdataset">list of strings</option>
         </param>
         <when value="single">
+            <param name="match" type="select"  label="retrieve sequences whose headers...">
+                <option value="include" selected="true">partially</option>
+                <option value="exact">exactly</option>
+            </param>
+            <param name="searchfor" type="select" label=" ">
+                <option value="with" selected="true">contain this string</option>
+                <option value="without">do not contain this string</option>
+            </param>
             <param name="query" type="text" size="30" value="" label="Search string" help="exemple: gi|40557596">
                 <sanitizer>
                     <valid initial="string.printable">
@@ -35,17 +51,17 @@
                     </mapping>
                 </sanitizer>
             </param>
-            <param name="searchfor" type="select" label="retrieve sequences whose headers contain or do not contain the search string">
-                <option value="with" selected="true">contain</option>
-                <option value="without">do not contain</option>
-            </param>
         </when>
         <when value="textdataset">
-            <param name="query" type="data" format="txt" label="term dataset" help="a list of term to search for, one term per line" />
-            <param name="searchfor" type="select" label="retrieve sequences whose headers contain or do not contain the search list">
-                <option value="with" selected="true">contain</option>
-                <option value="without">do not contain</option>
+            <param name="match" type="select"  label="retrieve sequences whose headers...">
+                <option value="includes" selected="true">partially</option>
+                <option value="exact">exactly</option>
             </param>
+            <param name="searchfor" type="select" label=" ">
+                <option value="with" selected="true">contain one of these list strings</option>
+                <option value="without">do not contain one of these list strings</option>
+            </param>
+            <param name="query" type="data" format="txt" label="list of strings dataset" help="a list of strings to search for, one string per line" />
         </when>
     </conditional>
   </inputs>
@@ -53,16 +69,50 @@
     <data name="output" format="fasta" label="Fasta sequences ${search.searchfor.value} ${search.options_selector} term(s) in header" />
   </outputs>
   <tests>
+    <!-- exact matches -->
+    <test>
+        <param ftype="fasta" name="input" value="input.fa" />
+        <param name="query" value="gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_65_Transcript_1/2_Confidence_0.667_Length_1344_hit1_IdMatch=43.46,AligLength=451,E-val=2e-122" />
+        <param name="searchfor" value="without" />
+        <param name="match" value="exact" />
+        <output name="output" ftype="fasta" file="output_exactly_not.fa" />
+    </test>
+    <test>
+        <param ftype="fasta" name="input" value="input.fa" />
+        <param name="query" value="gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_65_Transcript_1/2_Confidence_0.667_Length_1344_hit1_IdMatch=43.46,AligLength=451,E-val=2e-122" />
+        <param name="searchfor" value="with" />
+        <param name="match" value="exact" />
+        <output name="output" ftype="fasta" file="output_exact.fa" />
+    </test>
+
+
     <test>
         <param ftype="fasta" name="input" value="input.fa" />
-        <!-- <param name="options_selector" value="textdataset" /> -->
+        <param name="options_selector" value="textdataset" />
+        <param name="query" ftype="txt" value="alt_termlist.txt" />
+        <param name="searchfor" value="without" />
+        <param name="match" value="exact" />
+        <output name="output" ftype="fasta" file="output_alt_termlist_without.fa" />
+    </test>
+    <test>
+        <param ftype="fasta" name="input" value="input.fa" />
+        <param name="options_selector" value="textdataset" />
+        <param name="query" ftype="txt" value="alt_termlist.txt" />
+        <param name="searchfor" value="with" />
+        <param name="match" value="exact" />
+        <output name="output" ftype="fasta" file="output_alt_termlist.fa" />
+    </test>
+
+
+    <!-- partial matches -->
+    <test>
+        <param ftype="fasta" name="input" value="input.fa" />
         <param name="query" value="gi|81971654" />
         <param name="searchfor" value="with" />
         <output name="output" ftype="fasta" file="output.fa" />
     </test>
     <test>
         <param ftype="fasta" name="input" value="input.fa" />
-        <!-- <param name="options_selector" value="textdataset" /> -->
         <param name="query" value="RNA" />
         <param name="searchfor" value="without" />
         <output name="output" ftype="fasta" file="output_without.fa" />
@@ -82,17 +132,11 @@
         <output name="output" ftype="fasta" file="output_termlist_without.fa" />
     </test>
   </tests>
-
   <help>
 **What it does**
 
 This tool retrieves nucleotide/peptide sequences from a fasta file whose headers match
-or do not match a given string.
-
-It is Copyright © 2019 `CNRS and Sorbonne-Université`_ and is released under the `MIT license`_.
-
-.. _CNRS and  Sorbonne-Université: http://www.sorbonne-universite.fr/en
-.. _MIT license: http://opensource.org/licenses/MIT
+or do not match a given string, or a list of strings.
 
   </help>
 </tool>