Repository 'proteinortho'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/proteinortho

Changeset 5:5532c0e5d4a6 (2023-06-16)
Previous changeset 4:85c411546123 (2022-11-22) Next changeset 6:10112d9127af (2023-10-31)
Commit message:
planemo upload for repository https://gitlab.com/paulklemm_PHD/proteinortho commit b4d8b8da2a259973c9ad90e4b9d1a3e22ae4348f
modified:
proteinortho.xml
proteinortho_macros.xml
b
diff -r 85c411546123 -r 5532c0e5d4a6 proteinortho.xml
--- a/proteinortho.xml Tue Nov 22 16:49:50 2022 +0000
+++ b/proteinortho.xml Fri Jun 16 20:52:41 2023 +0000
[
b'@@ -2,24 +2,37 @@\n     <description>detects orthologous proteins/genes within different species</description>\n     <macros>\n         <import>proteinortho_macros.xml</import>\n-        <xml name="test_outputs">\n+        <xml name="test_output_proteinortho" tokens="nlines">\n             <output name="proteinortho">\n+                <metadata name="column_names" value="species,genes,alg.-conn.,L.fasta,C.fasta,E.fasta,M.fasta"/>\n                 <assert_contents>\n+                    <has_n_columns n="7"/>\n+                    <has_n_lines n="@NLINES@"/>\n                     <has_line_matching expression="# Species\\tGenes\\tAlg\\.-Conn\\.\\t.*"/>\n                     <has_line_matching expression="[0-9]+\\t[0-9]+\\t.*"/>\n                     <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+.*"/>\n                 </assert_contents>\n             </output>\n+        </xml>\n+        <xml name="test_output_blastgraph" tokens="nlines">\n             <output name="blastgraph">\n+                <metadata name="column_names" value="seqidA,seqidB,evalue_ab,bitscore_ab,evalue_ba,bitscore_ba"/>\n                 <assert_contents>\n+                    <has_n_columns n="6" comment="#"/>\n+                    <has_n_lines n="@NLINES@"/>\n                     <has_line_matching expression="# file_a\\tfile_b"/>\n                     <has_line_matching expression="# a\\tb\\tevalue_ab\\tbitscore_ab\\tevalue_ba\\tbitscore_ba"/>\n                     <has_line_matching expression="# (C|C2|E|L|M)\\.fasta\\t(C|C2|E|L|M)\\.fasta"/>\n                     <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+\\t(C|C2|E|L|M)_[0-9]+.*"/>\n                 </assert_contents>\n             </output>\n+        </xml>\n+        <xml name="test_output_proteinorthograph" tokens="nlines" token_nlines_delta="0" token_add_columns="" token_ncolumns="6">\n             <output name="proteinorthograph">\n+                <metadata name="column_names" value="seqidA,seqidB,evalue_ab,bitscore_ab,evalue_ba,bitscore_ba@ADD_COLUMNS@"/>\n                 <assert_contents>\n+                    <has_n_columns n="@NCOLUMNS@" comment="#"/>\n+                    <has_n_lines n="@NLINES@" delta="@NLINES_DELTA@"/>\n                     <has_line_matching expression="# file_a\\tfile_b"/>\n                     <has_line_matching expression="# a\\tb\\tevalue_ab\\tbitscore_ab\\tevalue_ba\\tbitscore_ba(\\tsame_strand\\tsimscore)?"/>\n                     <has_line_matching expression="# (C|C2|E|L|M)\\.fasta\\t(C|C2|E|L|M)\\.fasta"/>\n@@ -44,21 +57,23 @@\n         proteinortho \n             --project=result\n             --cpus="\\${GALAXY_SLOTS:-4}"\n-            --ram="\\${GALAXY_MEMORY_MB:-16000}"\n             #if $more_options.selfblast:\n                 $more_options.selfblast\n             #end if\n             #if $more_options.singles:\n                 $more_options.singles\n             #end if\n+            #if $more_options.core:\n+                $more_options.core\n+            #end if\n             --p=$p\n-            --e=$evalue\n+            --e=$more_options.evalue\n             --conn=$conn\n             #if $more_options.cov:\n                 --cov=$more_options.cov\n             #end if\n-            #if $more_options.sim:\n-                --sim=`LC_NUMERIC=C awk "BEGIN {printf \\"%.2f\\",$more_options.sim/100}"`\n+            #if $sim:\n+                --sim=`LC_NUMERIC=C awk "BEGIN {printf \\"%.2f\\",$sim/100}"`\n             #end if\n             #if $more_options.identity:\n                 --cov=$more_options.identity\n@@ -100,15 +115,16 @@\n             <option value="blatp">BLAT (aminoacid sequences)</option>\n             <option value="blatn">BLAT (nucleotide sequences)</option>\n         </param>\n-        <param argument="--evalue" type="float" value="0.001" min="0" label="E-value threshold of the blast algorithm" help="This is the main parameter for the generation of the reciprocal best hit graph. Larger values results in more false positives (connections between proteins)."/>\n-        <param argument="--conn" type="float" value="0.1'..b'owing these header lines, each line corresponds to a reciprocal best hit of 2 proteins/genes (columns 1 and 2) of the announced species. The output format is shown below.\n       | *seqidA*,*seqidB* = the 2 ids/names of the proteins involved \n       | *evalue_ab* = evalue with seqidA as query and seqidB as part of the database \n       | *bitscore_ab* = bitscore with seqidA as query ...\n       | *evalue_ba* = evalue with seqidB as query ...\n-      | ...\n \n .. csv-table::\n     \n     seqidA,seqidB,evalue_ab,bitscore_ab,evalue_ba,bitscore_ba    \n+    # ecoli.faa,human.faa\n+    # 1.91e-112,357.5,1.825e-113,360\n+    L_10,C_10;test,4.32e-151,447,4.30e-151,446\n+    L_11,C_11,1.17e-68,209,3.00e-69,210\n+    L_14,C_14,3.64e-139,422,1.19e-142,431\n+    L_15,C_15,3.51e-100,303,2.12e-102,308\n+    L_16,C_16,3.75e-49,157,7.06e-50,159\n+    L_17,C_17,2.96e-195,578,5.50e-196,579\n \n ----\n \n * **orthology-groups**\n \n       | The result of the (ii) step, the clustered reciprocal best hit graph or the orthology groups.\n-      | Every line corresponds to an orthology group of proteins/genes. \n-      | The first 3 columns characterize general properties of that group: number of proteins, species and the algebraic connectivity. The higher the algebraic connectivity the more edges are there and the better the group is connected to itself in general. \n-      | Then a column for each species follows containing the proteins of that species. If a species contributes with more than one protein to a group of orthologs, then they are ordered by connectivity.\n+      | Every line corresponds to an orthology group. \n+      | The first 3 columns characterize the general properties of that group: number of proteins, species, and algebraic connectivity. The higher the algebraic connectivity the more edges are there and the better the group is connected to itself in general. \n+      | Then a column for each species follows containing the proteins of these species. \n+      | If a species contributes with more than one protein to a group of orthologs, then they are ordered by descending connectivity.\n+      | The \'*\' represents that this species does not contribute to the group.\n \n .. csv-table::\n     \n-    Species,Genes,Alg.-Conn.   \n+    Species,Genes,alg.-conn.,ecoli.faa,human.faa,snail.faa,wale.faa,ebola.faa\n+    5,5,0.715,C_10,C_10;test,E_10,L_10,M_10\n+    4,6,0.115,*,C_12,E_315,L_313,M_313\n+    4,5,0.167,*,C_63,E_19,L_19,M_19\n+    4,4,0.816,*,C_64,E_18,L_18,M_18\n \n ----\n \n * **orthology-pairs**\n \n-      | The same as orthology-groups but every edge is printed one-by-one here. The output is formatted the same as the RBH graph:\n+      | The same as orthology-groups but every edge is printed one-by-one instead of the whole group. The output is formatted the same as the RBH graph:\n \n .. csv-table::\n     \n@@ -273,11 +345,17 @@\n \n **Proteinortho-Tools for downstream analysis**\n \n-* `proteinortho grab proteins` : find gene(s)/protein(s) in a given fasta file and retrieve their sequence(s). You can also use a orthology-groups file.\n+* `proteinortho grab proteins` : find gene(s)/protein(s) in a given fasta file and retrieve their sequence(s). You can also use a orthology-groups file or a subset (e.g. filter by Species>10).\n * `proteinortho summary` : Summaries the orthology-pairs/RBH files to determine how the species are connected to each other.\n \n More information can be found on github https://gitlab.com/paulklemm_PHD/proteinortho\n+\n+**Citations:**\n+\n+- Lechner, Marcus, et al. "Proteinortho: detection of (co-) orthologs in large-scale analysis." BMC bioinformatics 12.1 (2011): 1-9. (10.1186/1471-2105-12-124) \n+- Lechner, Marcus, et al. "Orthology detection combining clustering and synteny for very large datasets." PLoS one 9.8 (2014): e105015. (10.1371/journal.pone.0105015)\n+\n ]]>\n     </help>\n-    <expand macro="citations"/>\n+    <expand macro="citations" /> <!--- TODO: citations are not working in usegalxy, therefore they are added manually at the above. -->\n </tool>\n'
b
diff -r 85c411546123 -r 5532c0e5d4a6 proteinortho_macros.xml
--- a/proteinortho_macros.xml Tue Nov 22 16:49:50 2022 +0000
+++ b/proteinortho_macros.xml Fri Jun 16 20:52:41 2023 +0000
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-   <token name="@TOOL_VERSION@">6.1.2</token>
+   <token name="@TOOL_VERSION@">6.2.3</token>
    <token name="@WRAPPER_VERSION@">1</token>
    <token name="@PROFILE@">20.09</token>
    <xml name="citations">
@@ -12,12 +12,10 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">proteinortho</requirement>
-            <!-- blast, blat, and last are not in the biopython requirements
-                 diamond is, but latest version does not work: https://gitlab.com/paulklemm_PHD/proteinortho/-/issues/55 -->
-            <requirement type="package" version="2.0.15">diamond</requirement>
+            <requirement type="package" version="2.1.4">diamond</requirement>
             <requirement type="package" version="2.13.0">blast</requirement>
             <requirement type="package" version="377">ucsc-blat</requirement>
-            <requirement type="package" version="1418">last</requirement>
+            <requirement type="package" version="1422">last</requirement>
         </requirements>
     </xml>
     <xml name="version_command">