diff proteinortho_summary.xml @ 0:2e0e9c418a85 draft

"planemo upload for repository https://gitlab.com/paulklemm_PHD/proteinortho commit 889335c0a31f156c3f90d4c2048cb4df155a53b2"
author iuc
date Tue, 18 Feb 2020 17:56:58 -0500
parents
children c3f58c2eee1e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/proteinortho_summary.xml	Tue Feb 18 17:56:58 2020 -0500
@@ -0,0 +1,98 @@
+<tool id="proteinortho_summary" name="Proteinortho summary" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@">
+    <description>summaries the orthology-pairs/RBH files</description>
+    <macros>
+        <import>proteinortho_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+        proteinortho_summary.pl 
+            $queryfile
+            #if $queryfile2:
+                '$queryfile2'
+            #end if
+            2>&1 | awk '/^$/ && !f{f=1;next}1' | awk -v RS= '{print > ("output" NR ".tsv")}' 
+        &&
+        mv output1.tsv adjacencyMat.tsv &&
+        mv output2.tsv average1paths.tsv &&
+        mv output3.tsv adjacencyMatSquared.tsv &&
+        mv output4.tsv average2paths.tsv
+    ]]></command>
+    <inputs>
+        <param name="queryfile" type="data" format="tabular" label="A orthology-pairs / RBH file"/>
+        <param name="queryfile2" type="data" format="tabular" optional="true" label="(optional) A second orthology-pairs / RBH file" help="If you provide a second file, then difference is calculated (GRAPH - second GRAPH)"/>
+    </inputs>
+    <outputs>
+        <data name="adjacencyMat" format="tabular" label="${tool.name} on ${on_string}: Adjacency Matrix" from_work_dir="adjacencyMat.tsv"/>
+        <data name="average1paths" format="tabular" label="${tool.name} on ${on_string}: Average number of Edges" from_work_dir="average1paths.tsv"/>
+        <data name="adjacencyMatSquared" format="tabular" label="${tool.name} on ${on_string}: Matrix of 2-paths" from_work_dir="adjacencyMatSquared.tsv"/>
+        <data name="average2paths" format="tabular" label="${tool.name} on ${on_string}: Average number of 2-paths" from_work_dir="average2paths.tsv"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="4">
+            <param name="queryfile" value="result.proteinortho-graph"/>
+            <output name="adjacencyMat">
+                <assert_contents>
+                    <has_text text="18"/>
+                    <has_text text="14"/>
+                </assert_contents>
+            </output>
+            <output name="average1paths">
+                <assert_contents>
+                    <has_text text="9.6"/>
+                    <has_text text="15"/>
+                </assert_contents>
+            </output>
+            <output name="adjacencyMatSquared">
+                <assert_contents>
+                    <has_text text="750"/>
+                    <has_text text="74"/>
+                </assert_contents>
+            </output>
+            <output name="average2paths">
+                <assert_contents>
+                    <has_text text="1088.8"/>
+                    <has_text text="1374.2"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="queryfile" value="result.proteinortho-graph"/>
+            <param name="queryfile2" value="result.blast-graph"/>
+            <output name="average2paths">
+                <assert_contents>
+                    <has_text text="49.6"/>
+                    <has_text text="59.8"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[proteinortho summary
+
+**What it does**
+
+proteinortho_summary : Summaries the (orthology-pairs/RBH) file(s) to determine how well the species are connected to each other.
+
+ * **Adjacency Matrix** : How well are the species connected to each other directly.
+
+ * **Average number of Edges** : Averaged number of connections for each species.
+
+ * **Matrix of 2-paths** : The square of the adjacency matrix = The number of paths of length 2 between two species.
+
+ * **Average number of 2-paths** : The average number of 2-paths for each species. If a species is not well connected to all the other species, it will result in a low average.
+
+
+If you supply a second orthology-pairs/RBH then the difference is calculated for all 4 outputs.
+
+E.g. given the RBH and the orthology-pairs of the same run : The outputs show how much the clustering removed from the initial reciprocal best hit graph.
+Or given 2 orthology-pairs from the same set of fasta files with different parameters (evalue,...) : The output show how the parameters change the connectivity of the output.
+
+**Other Proteinortho-Tools for downstream analysis**
+
+* `proteinortho grab proteins` : find proteins/genes in a given fasta file and retrieve their sequence(s). You can also use a orthology-groups file.
+
+More information can be found on github https://gitlab.com/paulklemm_PHD/proteinortho
+]]>
+    </help>
+    <expand macro="citations"/>
+</tool>