diff proteinortho.xml @ 7:c5dd4f86d981 draft

planemo upload for repository https://gitlab.com/paulklemm_PHD/proteinortho commit 5eba1fb52a5ec1e63ca126be42062323e1a76687
author iuc
date Tue, 23 Jan 2024 12:21:52 +0000
parents 10112d9127af
children 6140163233a5
line wrap: on
line diff
--- a/proteinortho.xml	Tue Oct 31 16:32:03 2023 +0000
+++ b/proteinortho.xml	Tue Jan 23 12:21:52 2024 +0000
@@ -2,24 +2,24 @@
     <description>detects orthologous proteins/genes within different species</description>
     <macros>
         <import>proteinortho_macros.xml</import>
-        <xml name="test_output_proteinortho" tokens="nlines">
+        <xml name="test_output_proteinortho" tokens="nlines" token_nlines_delta="0">
             <output name="proteinortho">
                 <metadata name="column_names" value="species,genes,alg.-conn.,L.fasta,C.fasta,E.fasta,M.fasta"/>
                 <assert_contents>
                     <has_n_columns n="7"/>
-                    <has_n_lines n="@NLINES@"/>
+                    <has_n_lines n="@NLINES@" delta="@NLINES_DELTA@"/>
                     <has_line_matching expression="# Species\tGenes\tAlg\.-Conn\.\t.*"/>
                     <has_line_matching expression="[0-9]+\t[0-9]+\t.*"/>
                     <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+.*"/>
                 </assert_contents>
             </output>
         </xml>
-        <xml name="test_output_blastgraph" tokens="nlines">
+        <xml name="test_output_blastgraph" tokens="nlines" token_nlines_delta="0">
             <output name="blastgraph">
                 <metadata name="column_names" value="seqidA,seqidB,evalue_ab,bitscore_ab,evalue_ba,bitscore_ba"/>
                 <assert_contents>
                     <has_n_columns n="6" comment="#"/>
-                    <has_n_lines n="@NLINES@"/>
+                    <has_n_lines n="@NLINES@" delta="@NLINES_DELTA@"/>
                     <has_line_matching expression="# file_a\tfile_b"/>
                     <has_line_matching expression="# a\tb\tevalue_ab\tbitscore_ab\tevalue_ba\tbitscore_ba"/>
                     <has_line_matching expression="# (C|C2|E|L|M)\.fasta\t(C|C2|E|L|M)\.fasta"/>
@@ -97,6 +97,10 @@
                 #end for#
             #end if
             2> >(sed -E "s/.\[([0-9]{1,2}(;[0-9]{1,2})?)?[mGK]//g" 1>&2)
+        #if $more_options.selfblast:
+            &&
+            mv result.blast-graph_clean result.blast-graph;
+        #end if
         #if $synteny.synteny_options == "specified":
             &&
             mv result.poff-graph result.proteinortho-graph &&
@@ -176,9 +180,9 @@
         <test expect_num_outputs="3"> <!-- test normal -->
             <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/>
             <param name="p" value="diamond"/>
-            <expand macro="test_output_proteinortho" nlines="34"/>
-            <expand macro="test_output_blastgraph" nlines="157"/>
-            <expand macro="test_output_proteinorthograph" nlines="134"/>
+            <expand macro="test_output_proteinortho" nlines="33" nlines_delta="5"/>
+            <expand macro="test_output_blastgraph" nlines="156" nlines_delta="20"/>
+            <expand macro="test_output_proteinorthograph" nlines="139" nlines_delta="20"/>
             <assert_command>
                 <has_text text="--p=diamond"/>
             </assert_command>
@@ -191,13 +195,12 @@
             <section name="more_options">
                 <param name="cov" value="42"/>
                 <param name="identity" value="42"/>
-                <param name="selfblast" value="true"/>
                 <param name="singles" value="true"/>
                 <param name="core" value="true"/>
             </section>
-            <expand macro="test_output_proteinortho" nlines="177"/>
-            <expand macro="test_output_blastgraph" nlines="2720"/>
-            <expand macro="test_output_proteinorthograph" nlines="384"/>
+            <expand macro="test_output_proteinortho" nlines="151" nlines_delta="50"/>
+            <expand macro="test_output_blastgraph" nlines="1403" nlines_delta="300"/>
+            <expand macro="test_output_proteinorthograph" nlines="239" nlines_delta="150"/>
             <assert_command>
                 <has_text text="--p=diamond"/>
             </assert_command>
@@ -209,8 +212,8 @@
             <conditional name="synteny">
                 <param name="synteny_options" value="specified"/>
             </conditional>
-            <expand macro="test_output_proteinortho" nlines="38"/>
-            <expand macro="test_output_blastgraph" nlines="157"/>
+            <expand macro="test_output_proteinortho" nlines="38" nlines_delta="20"/>
+            <expand macro="test_output_blastgraph" nlines="300" nlines_delta="150"/>
             <expand macro="test_output_proteinorthograph" nlines="119" nlines_delta="10" ncolumns="8" add_columns=",same_strand,simscore"/>
             <assert_command>
                 <has_text text="--p=diamond"/>
@@ -219,9 +222,9 @@
         <test expect_num_outputs="3"> <!-- blast -->
             <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/>
             <param name="p" value="blastp"/>
-            <expand macro="test_output_proteinortho" nlines="32"/>
-            <expand macro="test_output_blastgraph" nlines="158"/>
-            <expand macro="test_output_proteinorthograph" nlines="142"/>
+            <expand macro="test_output_proteinortho" nlines="33" nlines_delta="20"/>
+            <expand macro="test_output_blastgraph" nlines="155" nlines_delta="50"/>
+            <expand macro="test_output_proteinorthograph" nlines="139" nlines_delta="50"/>
             <assert_command>
                 <has_text text="--p=blastp"/>
             </assert_command>
@@ -229,9 +232,9 @@
         <test expect_num_outputs="3"> <!-- auto blast -->
             <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/>
             <param name="p" value="autoblast"/>
-            <expand macro="test_output_proteinortho" nlines="32"/>
-            <expand macro="test_output_blastgraph" nlines="158"/>
-            <expand macro="test_output_proteinorthograph" nlines="142"/>
+            <expand macro="test_output_proteinortho" nlines="33" nlines_delta="20"/>
+            <expand macro="test_output_blastgraph" nlines="157" nlines_delta="50"/>
+            <expand macro="test_output_proteinorthograph" nlines="136" nlines_delta="50"/>
             <assert_command>
                 <has_text text="--p=autoblast"/>
             </assert_command>
@@ -239,9 +242,9 @@
         <test expect_num_outputs="3"> <!-- last -->
             <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/>
             <param name="p" value="lastp"/>
-            <expand macro="test_output_proteinortho" nlines="34"/>
-            <expand macro="test_output_blastgraph" nlines="148"/>
-            <expand macro="test_output_proteinorthograph" nlines="133"/>
+            <expand macro="test_output_proteinortho" nlines="34" nlines_delta="20"/>
+            <expand macro="test_output_blastgraph" nlines="148" nlines_delta="50"/>
+            <expand macro="test_output_proteinorthograph" nlines="134" nlines_delta="50"/>
             <assert_command>
                 <has_text text="--p=lastp"/>
             </assert_command>
@@ -249,9 +252,9 @@
         <test expect_num_outputs="3"> <!-- blat -->
             <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/>
             <param name="p" value="blastp"/>
-            <expand macro="test_output_proteinortho" nlines="32"/>
-            <expand macro="test_output_blastgraph" nlines="158"/>
-            <expand macro="test_output_proteinorthograph" nlines="142"/>
+            <expand macro="test_output_proteinortho" nlines="33" nlines_delta="20"/>
+            <expand macro="test_output_blastgraph" nlines="156" nlines_delta="50"/>
+            <expand macro="test_output_proteinorthograph" nlines="136" nlines_delta="50"/>
             <assert_command>
                 <has_text text="--p=blastp"/>
             </assert_command>
@@ -265,7 +268,7 @@
 
   | It compares similarities of given gene/protein sequences and clusters them to find significant groups.
   | The algorithm was designed to handle large-scale data and can be applied to hundreds of species at once.
-  | Details can be found in (doi:10.1186/1471-2105-12-124).
+  | Details can be found in (doi:10.1186/1471-2105-12-124 and doi:10.3389/fbinf.2023.1322477).
   | To enhance the prediction accuracy, the relative order of genes (synteny) can be used as an additional feature for the discrimination of orthologs. The corresponding extension, namely PoFF (details see doi:10.1371/journal.pone.0105015), is already built in Proteinortho. 
 
 ----
@@ -353,9 +356,6 @@
 
 **Citations:**
 
-- Lechner, Marcus, et al. "Proteinortho: detection of (co-) orthologs in large-scale analysis." BMC bioinformatics 12.1 (2011): 1-9. (10.1186/1471-2105-12-124) 
-- Lechner, Marcus, et al. "Orthology detection combining clustering and synteny for very large datasets." PLoS one 9.8 (2014): e105015. (10.1371/journal.pone.0105015)
-
 ]]>
     </help>
     <expand macro="citations" /> <!--- TODO: citations are not working in usegalxy, therefore they are added manually at the above. -->