Mercurial > repos > peterjc > venn_list
diff tools/venn_list/venn_list.xml @ 4:991342eca214 draft
Uploaded v0.0.8a, declare Biopython dependency via Tool Shed
author | peterjc |
---|---|
date | Wed, 29 Apr 2015 11:00:41 -0400 |
parents | |
children | 26e35d5133a1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/venn_list/venn_list.xml Wed Apr 29 11:00:41 2015 -0400 @@ -0,0 +1,130 @@ +<tool id="venn_list" name="Venn Diagram" version="0.0.8"> + <description>from lists</description> + <requirements> + <requirement type="python-module">rpy</requirement> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="python-module">Bio</requirement> + </requirements> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + <command interpreter="python"> +venn_list.py +#if $universe.type_select=="implicit": + - - +#else: + "$main" $main.ext +#end if +"$main_lab" +#for $s in $sets: + "$s.set" $s.set.ext "$s.lab" +#end for +$PDF + </command> + <inputs> + <param name="main_lab" size="30" type="text" value="Venn Diagram" label="Plot title"/> + <conditional name="universe"> + <param name="type_select" type="select" label="Implicit or explicit full ID list?"> + <option value="explicit">Explicit</option> + <option value="implicit">Implicit (use union of sets below)</option> + </param> + <when value="explicit"> + <param name="main" type="data" format="tabular,fasta,fastq,sff" label="Full dataset (with all identifiers)" help="Tabular file (uses column one), FASTA, FASTQ or SFF file."/> + </when> + <when value="implicit"/> + </conditional> + <repeat name="sets" min="1" max="3" title="Sets"> + <param name="set" type="data" format="tabular,fasta,fastq,sff" label="Members of set" help="Tabular file (uses column one), FASTA, FASTQ or SFF file."/> + <param name="lab" size="30" type="text" value="Group" label="Caption for set"/> + </repeat> + </inputs> + <outputs> + <data format="pdf" name="PDF" /> + </outputs> + <tests> + <!-- Doesn't seem to work properly, manages to get two sets, both + with same FASTA file, but second with default "Group" label. --> + <test> + <param name="type_select" value="explicit"/> + <param name="main" value="venn_list.tabular" ftype="tabular"/> + <param name="main_lab" value="Some Proteins"/> + <param name="set" value="rhodopsin_proteins.fasta"/> + <param name="lab" value="Rhodopsins"/> + <output name="PDF" file="magic.pdf" ftype="pdf" compare="contains" /> + </test> + <!-- Can't use more than one repeat value in tests (yet) + <test> + <param name="type_select" value="explicit"/> + <param name="main" value="venn_list.tabular" ftype="tabular"/> + <param name="main_lab" value="Some Proteins"/> + <param name="count" value="3"/> + <param name="set" value="rhodopsin_proteins.fasta"/> + <param name="lab" value="Rhodopsins"/> + <param name="set" value="four_human_proteins.fasta"/> + <param name="lab" value="Human"/> + <param name="set" value="blastp_four_human_vs_rhodopsin.tabular"/> + <param name="lab" value="Human vs Rhodopsin BLAST"/> + <output name="PDF" file="magic.pdf" ftype="pdf" compare="contains" /> + </test> + --> + </tests> + <help> + +.. class:: infomark + +**TIP:** If your data is in tabular files, the identifier is assumed to be in column one. + +**What it does** + +Draws Venn Diagram for one, two or three sets (as a PDF file). + +You must supply one, two or three sets of identifiers -- corresponding +to one, two or three circles on the Venn Diagram. + +In general you should also give the full list of all the identifiers +explicitly. This is used to calculate the number of identifers outside +the circles (and check the identifiers in the other files match up). +The full list can be omitted by implicitly taking the union of the +category sets. In this case, the count outside the categories (circles) +will always be zero. + +The identifiers can be taken from the first column of a tabular file +(e.g. query names in BLAST tabular output, or signal peptide predictions +after filtering, etc), or from a sequence file (FASTA, FASTQ, SFF). + +For example, you may have a set of NGS reads (as a FASTA, FASTQ or SFF +file), and the results of several different read mappings (e.g. to +different references) as tabular files (filtered to have just the mapped +reads). You could then show the different mappings (and their overlaps) +as a Venn Diagram, and the outside count would be the unmapped reads. + +**Citations** + +The Venn Diagrams are drawn using Gordon Smyth's limma package from +R/Bioconductor, http://www.bioconductor.org/ + +The R library is called from Python via rpy, http://rpy.sourceforge.net/ + +If you use this Galaxy tool in work leading to a scientific publication please +cite: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +This tool uses Biopython to read and write SFF files, so you may also wish to +cite the Biopython application note (and Galaxy too of course): + +Cock et al 2009. Biopython: freely available Python tools for computational +molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3. +http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878. + + </help> + <citations> + <citation type="doi">10.7717/peerj.167</citation> + <citation type="doi">10.1093/bioinformatics/15.5.356</citation> + </citations> +</tool>