Mercurial > repos > iuc > proteinortho_summary
diff proteinortho_summary.xml @ 0:2e0e9c418a85 draft
"planemo upload for repository https://gitlab.com/paulklemm_PHD/proteinortho commit 889335c0a31f156c3f90d4c2048cb4df155a53b2"
author | iuc |
---|---|
date | Tue, 18 Feb 2020 17:56:58 -0500 |
parents | |
children | c3f58c2eee1e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/proteinortho_summary.xml Tue Feb 18 17:56:58 2020 -0500 @@ -0,0 +1,98 @@ +<tool id="proteinortho_summary" name="Proteinortho summary" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@"> + <description>summaries the orthology-pairs/RBH files</description> + <macros> + <import>proteinortho_macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ + proteinortho_summary.pl + $queryfile + #if $queryfile2: + '$queryfile2' + #end if + 2>&1 | awk '/^$/ && !f{f=1;next}1' | awk -v RS= '{print > ("output" NR ".tsv")}' + && + mv output1.tsv adjacencyMat.tsv && + mv output2.tsv average1paths.tsv && + mv output3.tsv adjacencyMatSquared.tsv && + mv output4.tsv average2paths.tsv + ]]></command> + <inputs> + <param name="queryfile" type="data" format="tabular" label="A orthology-pairs / RBH file"/> + <param name="queryfile2" type="data" format="tabular" optional="true" label="(optional) A second orthology-pairs / RBH file" help="If you provide a second file, then difference is calculated (GRAPH - second GRAPH)"/> + </inputs> + <outputs> + <data name="adjacencyMat" format="tabular" label="${tool.name} on ${on_string}: Adjacency Matrix" from_work_dir="adjacencyMat.tsv"/> + <data name="average1paths" format="tabular" label="${tool.name} on ${on_string}: Average number of Edges" from_work_dir="average1paths.tsv"/> + <data name="adjacencyMatSquared" format="tabular" label="${tool.name} on ${on_string}: Matrix of 2-paths" from_work_dir="adjacencyMatSquared.tsv"/> + <data name="average2paths" format="tabular" label="${tool.name} on ${on_string}: Average number of 2-paths" from_work_dir="average2paths.tsv"/> + </outputs> + <tests> + <test expect_num_outputs="4"> + <param name="queryfile" value="result.proteinortho-graph"/> + <output name="adjacencyMat"> + <assert_contents> + <has_text text="18"/> + <has_text text="14"/> + </assert_contents> + </output> + <output name="average1paths"> + <assert_contents> + <has_text text="9.6"/> + <has_text text="15"/> + </assert_contents> + </output> + <output name="adjacencyMatSquared"> + <assert_contents> + <has_text text="750"/> + <has_text text="74"/> + </assert_contents> + </output> + <output name="average2paths"> + <assert_contents> + <has_text text="1088.8"/> + <has_text text="1374.2"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <param name="queryfile" value="result.proteinortho-graph"/> + <param name="queryfile2" value="result.blast-graph"/> + <output name="average2paths"> + <assert_contents> + <has_text text="49.6"/> + <has_text text="59.8"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[proteinortho summary + +**What it does** + +proteinortho_summary : Summaries the (orthology-pairs/RBH) file(s) to determine how well the species are connected to each other. + + * **Adjacency Matrix** : How well are the species connected to each other directly. + + * **Average number of Edges** : Averaged number of connections for each species. + + * **Matrix of 2-paths** : The square of the adjacency matrix = The number of paths of length 2 between two species. + + * **Average number of 2-paths** : The average number of 2-paths for each species. If a species is not well connected to all the other species, it will result in a low average. + + +If you supply a second orthology-pairs/RBH then the difference is calculated for all 4 outputs. + +E.g. given the RBH and the orthology-pairs of the same run : The outputs show how much the clustering removed from the initial reciprocal best hit graph. +Or given 2 orthology-pairs from the same set of fasta files with different parameters (evalue,...) : The output show how the parameters change the connectivity of the output. + +**Other Proteinortho-Tools for downstream analysis** + +* `proteinortho grab proteins` : find proteins/genes in a given fasta file and retrieve their sequence(s). You can also use a orthology-groups file. + +More information can be found on github https://gitlab.com/paulklemm_PHD/proteinortho +]]> + </help> + <expand macro="citations"/> +</tool>