Galaxy |

Changeset 35:fa736576c7ed (2016-07-04)

Previous changeset 34:f2cbf1230026 (2016-06-29) Next changeset 36:2201c5d61f16 (2016-07-04)

Commit message:
planemo upload commit 16d0bc526ad02361a7c13231d4c50479c42d8d0f-dirty

modified:
fasta2rdf.xml

added:
annotation.xml
aragorn.xml
circos.xml
crt.xml
enzdp.xml
gbk2rdf.xml
genecaller.xml
genomeInformation.xml
interproscan.xml
ipath.xml
loader.xml
locustagger.xml
matrix.xml
merger.xml
pathwayAnalysis.xml
phylogeny.xml
priam.xml
rdf2embl.xml
rnammer.xml
signalp.xml
swisscog.xml
tmhmm.xml

removed:
sappDocker/fasta2rdf.xml

diff -r f2cbf1230026 -r fa736576c7ed annotation.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/annotation.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,13 @@
+<tool id="DAnnotation" name="Protein annotation" version="0.1">
+ <description>SAPP - Protein annotation module</description>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:PROTEINANNOTATION</container>
+ </requirements>
+ <command interpreter="docker">java -jar /proteinannotation/proteinAnnotation-0.0.1-SNAPSHOT-jar-with-dependencies.jar '-input' '$input' '-output' '$output' -format TURTLE</command>
+ <inputs>
+ <param format="ttl" label="genome ttl file" name="input" type="data"/>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="Annotation: ${input.name}" name="output"/>
+ </outputs>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed aragorn.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/aragorn.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,80 @@
+<tool id="DAragorn" name="tRNA and tmRNA prediction" version="0.3">
+ <description>SAPP - Aragorn tRNA and tmRNA prediction</description>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:ARAGORN</container>
+ </requirements>
+ <command interpreter="docker">java -jar /aragorn/aragorn-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ '-input' '$input' '-output' '$output' '-gc' $genbank_gencode
+ '$tmRNA' '$tRNA' '$topology' -format TURTLE
+ </command>
+ <inputs>
+ <param format="ttl" label="RDF Genome" name="input" type="data"/>
+ <param label="Genetic code" name="genbank_gencode" type="select">
+ <option select="True" value="1">1. Standard</option>
+ <option value="2">2. Vertebrate Mitochondrial</option>
+ <option value="3">3. Yeast Mitochondrial</option>
+ <option value="4">4. Mold, Protozoan, and Coelenterate
+ Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
+ <option value="5">5. Invertebrate Mitochondrial</option>
+ <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear
+ Code</option>
+ <option value="9">9. Echinoderm Mitochondrial</option>
+ <option value="10">10. Euplotid Nuclear</option>
+ <option value="11">11. Bacteria and Archaea</option>
+ <option value="12">12. Alternative Yeast Nuclear</option>
+ <option value="13">13. Ascidian Mitochondrial</option>
+ <option value="14">14. Flatworm Mitochondrial</option>
+ <option value="15">15. Blepharisma Macronuclear</option>
+ <option value="16">16. Chlorophycean Mitochondrial</option>
+ <option value="21">21. Trematode Mitochondrial</option>
+ <option value="22">22. Scenedesmus obliquus mitochondrial</option>
+ <option value="23">23. Thraustochytrium Mitochondrial</option>
+ <option value="24">24. Pterobranchia mitochondrial</option>
+ </param>
+ <param label="Topology" name="topology" type="select">
+ <option value="-c">Assume that each sequence has a circular
+ topology</option>
+ <option value="-l">Assume that each sequence has a linear topology
+ </option>
+ </param>
+ <param checked="true" falsevalue="" help="" label="Search for tmRNA genes (-m)" name="tmRNA" truevalue="-m" type="boolean"/>
+ <param checked="true" falsevalue="" help="" label="Search for tRNA genes (-t)" name="tRNA" truevalue="-t" type="boolean"/>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="Aragorn: ${input.name}" name="output"/>
+ </outputs>
+ <citations>
+ <citation type="bibtex">@article{Laslett2004,
+ abstract = {A computer program, ARAGORN, identifies tRNA and tmRNA genes. The
+ program employs heuristic algorithms to predict tRNA secondary
+ structure, based on homology with recognized tRNA consensus sequences
+ and ability to form a base-paired cloverleaf. tmRNA genes are
+ identified using a modified version of the BRUCE program. ARAGORN
+ achieves a detection sensitivity of 99\% from a set of 1290
+ eubacterial, eukaryotic and archaeal tRNA genes and detects all
+ complete tmRNA sequences in the tmRNA database, improving on the
+ performance of the BRUCE program. Recently discovered tmRNA genes in
+ the chloroplasts of two species from the 'green' algae lineage are
+ detected. The output of the program reports the proposed tRNA
+ secondary structure and, for tmRNA genes, the secondary structure of
+ the tRNA domain, the tmRNA gene sequence, the tag peptide and a list
+ of organisms with matching tmRNA peptide tags.},
+ author = {Laslett, Dean and Canback, Bjorn},
+ doi = {10.1093/nar/gkh152},
+ file = {:Users/koeho006/Library/Application Support/Mendeley
+ Desktop/Downloaded/Laslett, Canback - 2004 - ARAGORN, a program to
+ detect tRNA genes and tmRNA genes in nucleotide sequences.pdf:pdf},
+ isbn = {1362-4962 (Electronic)$\backslash$n1362-4962 (Linking)},
+ issn = {03051048},
+ journal = {Nucleic Acids Research},
+ mendeley-groups = {VAPP Application note},
+ pages = {11--16},
+ pmid = {14704338},
+ title = {{ARAGORN, a program to detect tRNA genes and tmRNA genes in
+ nucleotide sequences}},
+ volume = {32},
+ year = {2004}
+ }
+ </citation>
+ </citations>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed circos.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/circos.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,36 @@
+<tool id="DCircos" name="Circle Image Generator" version="0.1">
+ <description>Circos View</description>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:CIRCOS</container>
+ </requirements>
+ <command interpreter="docker">python3.4 /circos/circos.py '-input' '$input' -output '$output1' '$output2'</command>
+ <inputs>
+ <param format="ttl" label="genome ttl file" name="input" type="data"/>
+ </inputs>
+ <outputs>
+ <data format="png" label="CIRCLE: ${input.name}" name="output1"/>
+ <data format="svg" label="CIRCLE: ${input.name}" name="output2"/>
+ </outputs>
+ <help>Visualization of the RDF genome using CIRCOS. It requires a RDF genome with optionally gene prediction.
+ </help>
+ <citations>
+ <citation type="bibtex">@article{Krzywinski2009,
+abstract = {We created a visualization tool called Circos to facilitate the identification and analysis of similarities and differences arising from comparisons of genomes. Our tool is effective in displaying variation in genome structure and, generally, any other kind of positional relationships between genomic intervals. Such data are routinely produced by sequence alignments, hybridization arrays, genome mapping, and genotyping studies. Circos uses a circular ideogram layout to facilitate the display of relationships between pairs of positions by the use of ribbons, which encode the position, size, and orientation of related genomic elements. Circos is capable of displaying data as scatter, line, and histogram plots, heat maps, tiles, connectors, and text. Bitmap or vector images can be created from GFF-style data inputs and hierarchical configuration files, which can be easily generated by automated tools, making Circos suitable for rapid deployment in data analysis and reporting pipelines.},
+author = {Krzywinski, Martin and Schein, Jacqueline and Birol, Inan\c{c} and Connors, Joseph and Gascoyne, Randy and Horsman, Doug and Jones, Steven J and Marra, Marco A},
+doi = {10.1101/gr.092759.109},
+issn = {1549-5469},
+journal = {Genome research},
+keywords = {Animals,Chromosome Mapping,Chromosomes, Artificial, Bacterial,Chromosomes, Human, Pair 17,Chromosomes, Human, Pair 17: genetics,Chromosomes, Human, Pair 6,Chromosomes, Human, Pair 6: genetics,Contig Mapping,Dogs,Gene Dosage,Gene Dosage: genetics,Genome,Genome: genetics,Genomics,Humans,Lymphoma, Follicular,Lymphoma, Follicular: genetics,Software},
+month = sep,
+number = {9},
+pages = {1639--45},
+pmid = {19541911},
+title = {{Circos: an information aesthetic for comparative genomics.}},
+url = {http://genome.cshlp.org/content/early/2009/06/15/gr.092759.109.abstract},
+volume = {19},
+year = {2009}
+}
+
+ </citation>
+ </citations>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed crt.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/crt.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,90 @@
+<tool id="DCRT" name="CRISPR detection" version="0.1">
+ <description></description>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:CRT</container>
+ </requirements>
+ <command interpreter="docker">java -jar /crt/CRT-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ '-input' '$input' -output '$output' -format TURTLE
+ </command>
+ <inputs>
+ <param name="input" type="data" format="ttl" label="genome ttl file" />
+ </inputs>
+
+ <outputs>
+ <data format="ttl" name="output" label="CRISPR: ${input.name}" />
+ </outputs>
+ <help>
+ CIRSPR prediction using CRT. Requires a converted
+ FASTA/EMBL/GenBank file.
+ </help>
+ <citations>
+ <citation type="bibtex">
+ @article{Bland2007,
+ abstract = {BACKGROUND:
+ Clustered Regularly Interspaced Palindromic Repeats
+ (CRISPRs) are a
+ novel type of direct repeat found in a wide range of
+ bacteria and
+ archaea. CRISPRs are beginning to attract attention
+ because of their
+ proposed mechanism; that is, defending their hosts
+ against invading
+ extrachromosomal elements such as viruses. Existing
+ repeat detection
+ tools do a poor job of identifying CRISPRs due to
+ the presence of
+ unique spacer sequences separating the repeats. In
+ this study, a new
+ tool, CRT, is introduced that rapidly and
+ accurately identifies
+ CRISPRs in large DNA strings, such as genomes
+ and metagenomes.
+ RESULTS: CRT was compared to CRISPR detection tools,
+ Patscan and
+ Pilercr. In terms of correctness, CRT was shown to be
+ very reliable,
+ demonstrating significant improvements over Patscan
+ for measures
+ precision, recall and quality. When compared to Pilercr,
+ CRT showed
+ improved performance for recall and quality. In terms of
+ speed, CRT
+ proved to be a huge improvement over Patscan. Both CRT and
+ Pilercr
+ were comparable in speed, however CRT was faster for genomes
+ containing large numbers of repeats. CONCLUSION: In this paper a new
+ tool was introduced for the automatic detection of CRISPR elements.
+ This tool, CRT, showed some important improvements over current
+ techniques for CRISPR identification. CRT's approach to detecting
+ repetitive sequences is straightforward. It uses a simple sequential
+ scan of a DNA sequence and detects repeats directly without any major
+ conversion or preprocessing of the input. This leads to a program
+ that is easy to describe and understand; yet it is very accurate,
+ fast and memory efficient, being O(n) in space and O(nm/l) in time.},
+ author = {Bland, Charles and Ramsey, Teresa L and Sabree, Fareedah
+ and Lowe, Micheal and Brown, Kyndall and Kyrpides, Nikos C and
+ Hugenholtz, Philip},
+ doi = {10.1186/1471-2105-8-209},
+ file =
+ {:Users/koeho006/Library/Application Support/Mendeley
+ Desktop/Downloaded/Bland et al. - 2007 - CRISPR recognition tool
+ (CRT) a tool for automatic detection of clustered regularly
+ interspaced palindromic repeat.pdf:pdf},
+ isbn = {1471-2105
+ (Electronic)$\backslash$n1471-2105 (Linking)},
+ issn = {14712105},
+ journal = {BMC bioinformatics},
+ mendeley-groups = {VAPP Application
+ note},
+ pages = {209},
+ pmid = {17577412},
+ title = {{CRISPR recognition
+ tool (CRT): a tool for automatic detection of
+ clustered regularly
+ interspaced palindromic repeats.}},
+ volume = {8},
+ year = {2007}
+ }
+ </citation>
+ </citations>
+</tool>

diff -r f2cbf1230026 -r fa736576c7ed enzdp.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/enzdp.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,16 @@
+<tool id="DEnzDP" name="EnzDP - Enzyme prediction" version="1.0.0">
+ <description/>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:ENZDP</container>
+ </requirements>
+ <command interpreter="docker">java -jar /enzdp/enzdpRDF-0.0.1-SNAPSHOT-jar-with-dependencies.jar -input $input -output $output -format TURTLE
+ </command>
+ <inputs>
+ <param format="ttl" label="genome ttl with protein sequences" name="input" type="data"/>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="ENZDP: ${input.name}" name="output"/>
+ </outputs>
+ <help/>
+ <citations/>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed fasta2rdf.xml
--- a/fasta2rdf.xml Wed Jun 29 02:21:47 2016 -0400
+++ b/fasta2rdf.xml Mon Jul 04 10:37:59 2016 -0400

@@ -1,10 +1,9 @@
<tool id="DFASTA2RDF" name="FASTA to RDF" version="0.1">
- <description>SAPP - FASTA 2 RDF conversion</description>
+ <description></description>
<requirements>
<container type="docker">jjkoehorst/sappdocker:FASTA2RDF</container>
</requirements>
-
- <command>java -jar /fasta2rdf/target/FASTA2RDF-0.1-jar-with-dependencies.jar
+ <command interpreter="docker">java -jar /fasta2rdf/FASTA2RDF-0.1-jar-with-dependencies.jar
                 '--type' '$source.fastaType' '--ignorestop' '$IgnoreStopCodon'
'--input' '$input' '--output' '$output' '-organism' '$organism'
'--ncbi_taxid' '$ncbi_taxid'
@@ -31,7 +30,8 @@
                                 <option value="genome"> Genome </option>
                                 <option value="gene"> Gene</option>
                                 <option value="protein"> Protein </option>
-                                <validator type="empty_field" message="Please select if it is a Genome, Gene or Protein" />
+                                <validator type="empty_field"
+                                        message="Please select if it is a Genome, Gene or Protein" />
                         </param>
                 </conditional>

diff -r f2cbf1230026 -r fa736576c7ed gbk2rdf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gbk2rdf.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,24 @@
+<tool id="DGBK2RDF" name="EMBL/GBK to RDF" version="0.1">
+ <description>Converts GenBank/EMBL files to RDF</description>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:GBK2RDF</container>
+ </requirements>
+ <command>java -jar /genbank2rdf/GenBank2RDF-0.0.1-SNAPSHOT-jar-with-dependencies.jar '-input' '$input' -output '$output' -source "$source" -format "$format" -identifier "${input.name}" -codon "$codon" </command>
+ <inputs>
+ <param format="gbk,gb,genbank" label="Genbank file" name="input" type="data"/>
+ <param label="EMBL/GBK" name="format" type="select">
+ <option value="gbk">Genbank</option>
+ <option selected="true" value="embl">EMBL</option>
+ </param>
+ <param label="11/4" name="codon" type="select">
+ <option selected="true" value="11">11</option>
+ <option value="4">4</option>
+ </param>
+ <param label="Source of annotation eg. RAST/NCBI/EBI" name="source" optional="false" type="text"/>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="GBKttl: ${input.name}" name="output"/>
+ </outputs>
+ <help>Java Genbank or EMBL to RDF conversion
+ </help>
+</tool>

diff -r f2cbf1230026 -r fa736576c7ed genecaller.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genecaller.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,103 @@
+<tool id="DGenes" name="Gene prediction" version="1.0.0">
+ <description/>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:GENECALLER</container>
+ </requirements>
+ <command interpreter="docker">java -jar /genecaller/genecaller-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ '-runtype' '$runtype' -input $input -output $output -codon $codon -format TURTLE
+ </command>
+ <inputs>
+ <param format="ttl" label="ttl genome file" name="input" type="data"/>
+ <param label="codon table selection" name="codon" type="select">
+ <option value="11">The Bacterial, Archaeal and Plant Plastid Code
+ (transl_table=11)
+ </option>
+ <option value="4">The Mold, Protozoan, Coelenterate Mitochondrial
+ and Mycoplasma/Spiroplasma Code (transl_table=4)
+ </option>
+ </param>
+ <param label="single or meta genome" name="runtype" type="select">
+ <option value="single">Single genome analysis</option>
+ <option value="meta">Metagenome analysis</option>
+ </param>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="ORF: ${input.name}" name="output"/>
+ </outputs>
+ <help>Prodigal gene prediction requires an RDF file from either a
+ Genome FASTA or
+ Genbank/EMBL format.
+ </help>
+ <citations>
+ <citation type="bibtex">@article{Hyatt2010,
+ abstract = {BACKGROUND: The
+ quality of automated gene prediction in microbial
+ organisms has
+ improved steadily over the past decade, but there is
+ still room for
+ improvement. Increasing the number of correct
+ identifications, both of
+ genes and of the translation initiation
+ sites for each gene, and
+ reducing the overall number of false
+ positives, are all desirable
+ goals.
+
+ RESULTS: With our years of experience in manually curating
+ genomes for the
+ Joint Genome Institute, we developed a new gene
+ prediction algorithm
+ called Prodigal (PROkaryotic DYnamic programming
+ Gene-finding
+ ALgorithm). With Prodigal, we focused specifically on the
+ three goals
+ of improved gene structure prediction, improved
+ translation
+ initiation site recognition, and reduced false positives.
+ We compared
+ the results of Prodigal to existing gene-finding methods
+ to
+ demonstrate that it met each of these objectives.
+
+ CONCLUSION: We
+ built a fast, lightweight, open source gene prediction program
+ called
+ Prodigal http://compbio.ornl.gov/prodigal/. Prodigal achieved
+ good
+ results compared to existing methods, and we believe it will be
+ a
+ valuable asset to automated microbial annotation pipelines.},
+ author =
+ {Hyatt, Doug and Chen, Gwo-Liang and Locascio, Philip F and
+ Land,
+ Miriam L and Larimer, Frank W and Hauser, Loren J},
+ doi =
+ {10.1186/1471-2105-11-119},
+ file =
+ {:Users/koeho006/Library/Application Support/Mendeley
+ Desktop/Downloaded/Hyatt et al. - 2010 - Prodigal prokaryotic gene
+ recognition and translation initiation site identification.pdf:pdf},
+ issn = {1471-2105},
+ journal = {BMC bioinformatics},
+ keywords =
+ {Algorithms,Databases, Genetic,Genome, Bacterial,Peptide Chain
+ Initiation, Translational,Peptide Chain Initiation, Translational:
+ genetics,Prokaryotic Cells,Software},
+ mendeley-groups = {Dump/VAPP
+ Paper},
+ month = jan,
+ number = {1},
+ pages = {119},
+ pmid = {20211023},
+ title = {{Prodigal: prokaryotic gene recognition and translation
+ initiation site identification.}},
+ url =
+ {http://www.biomedcentral.com/1471-2105/11/119},
+ volume = {11},
+ year =
+ {2010}
+ }
+
+ </citation>
+ </citations>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed genomeInformation.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genomeInformation.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,16 @@
+<tool id="DInfo" name="Information overview" version="1.0.0">
+  <description>Information overview</description>
+  <requirements>
+    <container type="docker">jjkoehorst/sappdocker:GENOMEINFORMATION</container>
+  </requirements>
+  <command interpreter="docker">java -jar /genomeinformation/GenomeInformation-0.0.1-SNAPSHOT-jar-with-dependencies.jar -input $input -output '$output' -format 'TURTLE'
+    </command>
+  <inputs>
+    <param format="ttl" label="Genome Database with Interpro" multiple="True" name="input" type="data"/>
+  </inputs>
+  <outputs>
+    <data format="text" label="information.text" name="output"/>
+  </outputs>
+  <help>Genome(s) information overview
+  </help>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed interproscan.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/interproscan.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,148 @@
+<tool id="DInterproscan" name="Interproscan" version="1.0.0">
+ <description>Interproscan annotation for SAPP</description>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:INTERPROSCAN</container>
+ </requirements>
+ <command interpreter="docker">java -jar /interproscan/interproscanRDF-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ '-input' '$input' '-format' 'TURTLE'
+ '-applications' '$appl'
+ '-output'
+ '$outfile' -v '$version' '$disable'
+ </command>
+ <inputs>
+ <param format="ttl" label="genome rdf file with orf prediction" name="input" type="data"/>
+ <param display="checkboxes" help="Select your programm." label="Applications to run" multiple="True" name="appl" type="select">
+ <option selected="true" value="TIGRFAM">TIGRFAM: protein families
+ based on Hidden Markov Models or HMMs
+ </option>
+ <option selected="false" value="PIRSF">PIRSF: non-overlapping
+ clustering of UniProtKB sequences into a hierarchical order
+ (evolutionary relationships)
+ </option>
+ <option selected="true" value="ProDom">ProDom: set of protein domain
+ families generated from the UniProtKB
+ </option>
+ <option selected="true" value="SMART">SMART: identification and
+ analysis of domain architectures based on Hidden Markov Models or
+ HMMs
+ </option>
+ <option selected="false" value="PrositeProfiles">PROSITE Profiles:
+ protein domains, families and functional sites as well as associated
+ profiles to identify them
+ </option>
+ <option selected="true" value="PrositePatterns">PROSITE Pattern:
+ protein domains, families and functional sites as well as associated
+ patterns to identify them
+ </option>
+ <option selected="false" value="HAMAP">HAMAP: High-quality Automated
+ Annotation of Microbial Proteomes
+ </option>
+ <option selected="true" value="PfamA">PfamA: protein families, each
+ represented by multiple sequence alignments and hidden Markov models
+ </option>
+ <option selected="true" value="PRINTS">PRINTS: group of conserved
+ motifs (fingerprints) used to characterise a protein family
+ </option>
+ <option selected="true" value="SuperFamily">SUPERFAMILY: database of
+ structural and functional annotation
+ </option>
+ <option selected="true" value="Coils">Coils: Prediction of Coiled
+ Coil Regions in Proteins
+ </option>
+ <option selected="true" value="Gene3d">Gene3d: Structural assignment
+ for whole genes and genomes using the CATH domain structure database
+ </option>
+ </param>
+ <param label="Version selection" name="version" type="select">
+ <option value="interproscan-5.17-56.0">interproscan-5.17-56.0</option>
+ </param>
+ <param checked="false" falsevalue="-disableprecalc" help="You need to setup your own lookup server as the EBI version can differ. Look at interproscan configuration file for more info" label="Perform lookup of InterPro at defined server address" name="disable" truevalue="" type="boolean"/>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="IPR: ${input.name}" name="outfile"/>
+ </outputs>
+ <help>Interproscan annotation suite. Select your RDF genome with
+ protein annotation.
+ This can be either from a converted GenBank/EMBL
+ file or from a
+ Prodigal prediction.
+ The output will be an RDF file with
+ protein domain annotation from
+ InterPro.
+ </help>
+ <citations>
+ <citation type="bibtex">@article{Mitchell26112014,
+ author = {Mitchell,
+ Alex and Chang, Hsin-Yu and Daugherty, Louise and
+ Fraser, Matthew and
+ Hunter, Sarah and Lopez, Rodrigo and McAnulla,
+ Craig and McMenamin,
+ Conor and Nuka, Gift and Pesseat, Sebastien and
+ Sangrador-Vegas, Amaia
+ and Scheremetjew, Maxim and Rato, Claudia and
+ Yong, Siew-Yit and
+ Bateman, Alex and Punta, Marco and Attwood, Teresa
+ K. and Sigrist,
+ Christian J.A. and Redaschi, Nicole and Rivoire,
+ Catherine and
+ Xenarios, Ioannis and Kahn, Daniel and Guyot, Dominique
+ and Bork, Peer
+ and Letunic, Ivica and Gough, Julian and Oates, Matt
+ and Haft, Daniel
+ and Huang, Hongzhan and Natale, Darren A. and Wu,
+ Cathy H. and Orengo,
+ Christine and Sillitoe, Ian and Mi, Huaiyu and
+ Thomas, Paul D. and
+ Finn, Robert D.},
+ title = {The InterPro protein families database: the
+ classification
+ resource after 15 years},
+ year = {2014},
+ doi =
+ {10.1093/nar/gku1243},
+ abstract ={The InterPro database
+ (http://www.ebi.ac.uk/interpro/) is a freely
+ available resource that
+ can be used to classify sequences into
+ protein families and to predict
+ the presence of important domains and
+ sites. Central to the InterPro
+ database are predictive models, known
+ as signatures, from a range of
+ different protein family databases
+ that have different biological
+ focuses and use different
+ methodological approaches to classify
+ protein families and domains.
+ InterPro integrates these signatures,
+ capitalizing on the respective
+ strengths of the individual databases,
+ to produce a powerful protein
+ classification resource. Here, we report
+ on the status of InterPro as
+ it enters its 15th year of operation, and
+ give an overview of new
+ developments with the database and its
+ associated Web interfaces and
+ software. In particular, the new domain
+ architecture search tool is
+ described and the process of mapping of
+ Gene Ontology terms to
+ InterPro is outlined. We also discuss the
+ challenges faced by the
+ resource given the explosive growth in
+ sequence data in recent years.
+ InterPro (version 48.0) contains 36 766
+ member database signatures
+ integrated into 26 238 InterPro entries, an
+ increase of over 3993
+ entries (5081 signatures), since 2012.},
+ URL =
+ {http://nar.oxfordjournals.org/content/early/2014/11/26/nar.gku1243.abstract},
+ eprint =
+ {http://nar.oxfordjournals.org/content/early/2014/11/26/nar.gku1243.full.pdf+html},
+ journal = {Nucleic Acids Research}
+ }
+ </citation>
+ </citations>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed ipath.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ipath.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,39 @@
+<tool id="DIPath" name="iPath" version="2.01">
+ <description>iPath Generator</description>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:IPATH</container>
+ </requirements>
+ <command interpreter="docker">java -jar /ipath/iPath-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ #if $conditional.source_select=="single"
+ -group1 '$conditional.input'
+ #else
+ -group1 $conditional.input1
+ -group2 $conditional.input2
+ #end if
+ -o "$outfile" -format TURTLE
+ </command>
+ <inputs>
+ <param label="Priam E-Value cutoff" name="Evalue" optional="False" size="60" type="float" value="1E-1"/>
+ <conditional name="conditional">
+ <param label="Analysis method" name="source_select" type="select">
+ <option value="single">Single genome</option>
+ <option value="group">Group comparison</option>
+ </param>
+ <when value="single">
+ <param format="ttl" label="RDF Genome file" name="input" type="data"/>
+ </when>
+ <when value="group">
+ <param format="rdf" label="RDF Genome file for group 1" multiple="True" name="input1" type="data"/>
+ <param format="rdf" label="RDF Genome file for group 2" multiple="True" name="input2" type="data"/>
+ </when>
+ </conditional>
+ </inputs>
+ <outputs>
+ <data format="tsv" label="iPath comparison" name="outfile"/>
+ </outputs>
+ <help>GROUP 1: #FF0000
+GROUP 2: #0000FF
+BOTH: #00FF00
+
+</help>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed loader.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/loader.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,17 @@
+<tool id="DLoader" name="RDF remote Loader" version="0.1">
+ <description/>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:LOADER</container>
+ </requirements>
+ <command interpreter="docker">java -jar /loader/Loader-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ -input '$input' -endpoint "$endpoint" -format "application/x-turtle"</command>
+ <inputs>
+ <param format="ttl" label="Annotated RDF file" multiple="true" name="input" type="data"/>
+ <param label="SPARQL endpoint" name="endpoint" optional="false" type="text"/>
+ </inputs>
+ <outputs>
+ <data format="xml" label="Loader: $endpoint" name="output"/>
+ </outputs>
+ <help>RDF Loader into a remote SPARQL end point
+ </help>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed locustagger.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/locustagger.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,18 @@
+<tool id="DLocus" name="Locus tags inference from GBK import" version="0.1">
+ <description>Locus tagger inference from original annotation</description>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:LOCUSTAGGER</container>
+ </requirements>
+ <command interpreter="docker">java -jar /locustagger/LocusTagger-0.1-jar-with-dependencies.jar
+ '-input' '$input' -format 'TURTLE' -output '$output' -prefix '$prefix'</command>
+ <inputs>
+ <param format="ttl" label="RDF file" name="input" type="data"/>
+ <param format="text" label="PREFIX identifier" name="prefix" size="60" type="text"/>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="LocusTagger: ${input.name}" name="output"/>
+ </outputs>
+ <help>Locus tag inference from original genbank/embl that was converted
+ to RDF
+ </help>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed matrix.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/matrix.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,63 @@
+<tool id="DMatrix" name="MATSPARQL" version="1.0.1">
+  <description/>
+  <requirements>
+    <container type="docker">jjkoehorst/sappdocker:MATRIX</container>
+  </requirements>
+  <command interpreter="docker">java -jar /sparql/sparqljava-0.0.1-SNAPSHOT-jar-with-dependencies.jar '$separate' '-rdf' '$input' '-format' 'TURTLE' '-query' '$query' '-output' '$output' && Rscript $__tool_directory__/matrix.R '$output' '$output' </command>
+  <inputs>
+    <param format="ttl" label="Genome Database" multiple="True" name="input" type="data"/>
+    <param area="True" label="SPARQL query" name="query" type="text" value="YOUR QUERY HERE"/>
+    <param checked="False" falsevalue="" help="Use this option if you run into memory or performance problems. Each genome will be queried independently of each other and therefor advanced comparison SPARQL queries will not work" label="Treath genomes separately" name="separate" truevalue="-separate" type="boolean"/>
+  </inputs>
+  <outputs>
+    <data format="tsv" label="matrix.tsv" name="output"/>
+  </outputs>
+  <help>The creation of a  matrix from a created SPARQL query. One should use a query that creates 3 columns for the X and Y coordinates and Z for the value.
+
+    A header for a SPARQL query would look like SELECT ?genome ?protein ?value or SELECT ?genome ?domain (COUNT(?domain) AS ?domainC)
+
+-----------------------------
+Genome Interpro Matrix
+-----------------------------
+The following query results in a matrix of genomes by Pfam accessions ::
+
+    PREFIX biopax:<http://www.biopax.org/release/bp-level3.owl#>
+    PREFIX ssb:<http://csb.wur.nl/genome/>
+    SELECT DISTINCT ?genome ?id (COUNT(?id) AS ?value)
+    WHERE {
+      ?genome a ssb:Genome .
+      ?genome ssb:dnaobject ?dna .
+      ?dna ssb:feature ?feature .
+      ?feature ssb:tool ?tool .
+      ?feature ssb:protein ?protein .
+      ?protein ssb:feature ?domain .
+      ?domain ssb:signature ?signature .
+      ?signature biopax:xref ?xref .
+      ?xref biopax:db 'pfam' .
+      ?xref biopax:id ?id .
+      } GROUP BY ?genome ?id
+
+-------------------
+Enzyme based matrix
+-------------------
+
+The following query results in a matrix of genomes by EC numbers ::
+
+   PREFIX ssb:<http://csb.wur.nl/genome/>
+   SELECT  ?genome ?ec (COUNT(?ec) AS ?ecCount)
+   WHERE {
+        ?gene a ssb:Cds .
+        ?gene ssb:locus_tag ?locus .
+        ?gene ssb:source ?source .
+        ?gene ssb:protein ?protein .
+        ?protein ssb:feature ?feature .
+        {
+            ?feature ssb:kegg ?ec .
+        } UNION {
+            ?feature ssb:ec_number ?ec .
+        }
+   } GROUP BY ?genome ?ec
+
+**If you require specific questions related to the database that you created feel free to contact us.**
+  </help>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed merger.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/merger.xml Mon Jul 04 10:37:59 2016 -0400

[

@@ -0,0 +1,17 @@
+<tool id="DMerger" name="Merger of RDF" version="1.0.0">
+  <description/>
+  <requirements>
+    <container type="docker">jjkoehorst/sappdocker:MERGER</container>
+  </requirements>
+  <command interpreter="docker">java -jar /merger/rdfMerge-0.0.1-SNAPSHOT-jar-with-dependencies.jar -input $input  -output '$output' -format TURTLE
+    </command>
+  <inputs>
+    <param format="ttl" label="RDF Files" multiple="True" name="input" type="data"/>
+  </inputs>
+  <outputs>
+    <data format="ttl" label="MERGER: {$input[-1].name}" name="output"/>
+  </outputs>
+  <help>Merges multiple genome TURTLE runs into a single end...
+    Handy when running multiple parallel analysis and results can be merged into a single TURTLE RDF file...
+  </help>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed pathwayAnalysis.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pathwayAnalysis.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,12 @@
+<tool id="DPathway" name="Pathway analysis" version="0.1">
+ <description/>
+ <command interpreter="docker">java -jar /pathwayanalysis/pathwayAnalysis-0.0.1-SNAPSHOT-jar-with-dependencies.jar -input $input -output $output -format TURTLE</command>
+ <inputs>
+ <param format="ttl" label="Annotated RDF file" name="input" type="data"/>
+ </inputs>
+ <outputs>
+ <data format="tsv" label="PathwayAnalysis: ${input.name}" name="output"/>
+ </outputs>
+ <help>Pathway overview information for MetaCyc and KEGG
+ </help>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed phylogeny.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phylogeny.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,16 @@
+<tool id="DPhylogeny" name="Phylogeny analysis" version="0.1">
+ <description/>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:PHYLOGENY</container>
+ </requirements>
+ <command interpreter="java -jar">/phylogeny/phylogeny-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ -input $input -output $output -format TURTLE</command>
+ <inputs>
+ <param format="ttl" label="Annotated RDF file" multiple="true" name="input" type="data"/>
+ </inputs>
+ <outputs>
+ <data format="newick" label="Phylogeny" name="output"/>
+ </outputs>
+ <help>Phylogeny analysis based on protein signatures
+ </help>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed priam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/priam.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,55 @@
+<tool id="DPriam" name="PRIAM EC detection" version="1.0.0">
+ <description/>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:PRIAM</container>
+ </requirements>
+ <command interpreter="docker">java -jar /priam/priam-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ '-input' '$input' -format TURTLE -output '$output'
+ </command>
+ <inputs>
+ <param format="ttl" label="ttl genome file" name="input" type="data"/>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="PRIAM: ${input.name}" name="output"/>
+ </outputs>
+ <help>EC detection using PRIAM. An RDF file with protein prediction is
+ required. Either from Genbank/EMBL or from Prodigal gene prediction
+ module.
+ </help>
+ <citations>
+ <citation type="bibtex">@article{Claudel-Renard2003,
+ abstract = {The
+ advent of fully sequenced genomes opens the ground for the
+ reconstruction of metabolic pathways on the basis of the
+ identification of enzyme-coding genes. Here we describe PRIAM, a
+ method for automated enzyme detection in a fully sequenced genome,
+ based on the classification of enzymes in the ENZYME database. PRIAM
+ relies on sets of position-specific scoring matrices ( profiles')
+ automatically tailored for each ENZYME entry. Automatically generated
+ logical rules define which of these profiles is required in order to
+ infer the presence of the corresponding enzyme in an organism. As an
+ example, PRIAM was applied to identify potential metabolic pathways
+ from the complete genome of the nitrogen-fixing bacterium
+ Sinorhizobium meliloti. The results of this automated method were
+ compared with the original genome annotation and visualised on KEGG
+ graphs in order to facilitate the interpretation of metabolic
+ pathways and to highlight potentially missing enzymes.},
+ author =
+ {Claudel-Renard, C.},
+ doi = {10.1093/nar/gkg847},
+ issn = {1362-4962},
+ journal = {Nucleic Acids Research},
+ month = nov,
+ number = {22},
+ pages =
+ {6633--6639},
+ title = {{Enzyme-specific profiles for genome
+ annotation: PRIAM}},
+ url =
+ {http://nar.oxfordjournals.org/content/31/22/6633.abstract?etoc},
+ volume = {31},
+ year = {2003}
+ }
+ </citation>
+ </citations>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed rdf2embl.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rdf2embl.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,70 @@
+<tool id="DRDF2EMBL" name="EMBL/GenBank creation" version="0.1">
+ <description/>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:RDF2EMBL</container>
+ </requirements>
+ <command interpreter="docker">java -jar /rdf2embl/rdf2embl-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ '-input' '$input' -output '$output' -format 'TURTLE' '-organism'
+ '$organism' '-strain' '$strain' '-substrain' '$substrain' '-keywords'
+ '$keywords' '-taxon' '$taxon' -codon '$codon'
+ -locus '$prefix' '-title'
+ '$title' '-authors' '$authors' '-consortium' '$consortium' '-journal'
+ '$journal' '-dataclass' '$dataclass' '-writer' '$writer' '-projectid'
+ '$projectid' '$pathwaytools' '-note' '$note' '-scaffold' '$scaffold'
+ '$gapprotein'
+ </command>
+ <inputs>
+ <param format="ttl" label="TTL / RDF file" multiple="False" name="input" type="data"/>
+ <param label="Organism name" name="organism" optional="false" type="text"/>
+ <param label="Strain name" name="strain" optional="false" type="text"/>
+ <param label="Substrain name" name="substrain" optional="false" type="text"/>
+ <param label="Keywords" name="keywords" optional="false" type="text"/>
+ <param label="Project identifier" name="projectid" optional="false" type="text"/>
+ <param label="Taxon number" name="taxon" optional="false" type="text"/>
+ <param label="Locus prefix (FZH_)" name="prefix" optional="false" type="text"/>
+ <param label="Scaffold prefix (SCAF_)" name="scaffold" optional="false" type="text"/>
+ <param label="Journal" name="journal" optional="false" type="text" value="journal vol:pp-pp(year)"/>
+ <param label="Authors" name="authors" optional="false" type="text"/>
+ <param label="Title" name="title" optional="false" type="text"/>
+ <param label="Consortium" name="consortium" optional="false" type="text"/>
+ <param area="True" label="Note for each record" name="note" optional="false" size="10" type="text" value="Annotation was performed using the Semantic Annotation Platform for Prokaryotes (SAPP) and the sha384 key is $shakey and the FASTA header name is: $header"/>
+ <param label="codon table selection" name="codon" type="select">
+ <option value="11">The Bacterial, Archaeal and Plant Plastid Code
+ (transl_table=11)
+ </option>
+ <option value="4">The Mold, Protozoan, Coelenterate Mitochondrial
+ and Mycoplasma/Spiroplasma Code (transl_table=4)
+ </option>
+ </param>
+ <param label="Output format" name="writer" type="select">
+ <option selected="true" value="embl">EMBL format</option>
+ <option value="genbank">Genbank format</option>
+ </param>
+ <param label="Data class selection" name="dataclass" type="select">
+ <option value="PAT">Patent</option>
+ <option value="EST">Expressed Sequence Tag</option>
+ <option value="GSS">Genome Survey Sequence</option>
+ <option value="HTC">High Thoughput CDNA sequencing</option>
+ <option value="HTG">High Thoughput Genome sequencing</option>
+ <option value="MGA">Mass Genome Annotation</option>
+ <option selected="true" value="WGS">Whole Genome Shotgun</option>
+ <option value="TSA">Transcriptome Shotgun Assembly</option>
+ <option value="STS">Sequence Tagged Site</option>
+ <option value="STD">Standard (all entries not classified as above)
+ </option>
+ </param>
+ <param label="Remove gap spanning proteins (contains more than 50% of XXX in sequence)" name="gapprotein" type="select">
+ <option selected="true" value="">No</option>
+ <option selected="false" value="-gapprotein">Yes</option>
+ </param>
+ <param label="Pathway tools compatible? WARNING: Each contig needs to be loaded individually into Pathway tools (V19.0)" name="pathwaytools" type="select">
+ <option selected="true" value="">No</option>
+ <option selected="false" value="-pathwaytools">Yes</option>
+ </param>
+ </inputs>
+ <outputs>
+ <data format="embl" label="EMBL: ${input.name}" name="output"/>
+ </outputs>
+ <help>RDF to EMBL conversion. Locus tags are automatically generated unless locus tags have been inferred or generated through the locus module.
+ </help>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed rnammer.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rnammer.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,87 @@
+<tool id="DRnammer" name="rRNA detection" version="1.0.0">
+ <description/>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:RNAMMER</container>
+ </requirements>
+ <command interpreter="docker">java -jar /rnammer/rnammer-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ '-input' '$input' -output '$output' -format TURTLE
+ </command>
+ <inputs>
+ <param format="ttl" label="genome ttl file" name="input" type="data"/>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="RNA: ${input.name}" name="output"/>
+ </outputs>
+ <help>Be aware that this can only be used for academic users; other
+ users are
+ requested to contact CBS Software Package Manager at
+ software@cbs.dtu.dk.
+ We are investigating alternative prediction
+ applications, please contact
+ us if you are aware of such method.
+ </help>
+ <citations>
+ <citation type="bibtex">@article{Lagesen2007,
+ abstract = {The
+ publication of a complete genome sequence is usually
+ accompanied by
+ annotations of its genes. In contrast to protein
+ coding genes, genes
+ for ribosomal RNA (rRNA) are often poorly or
+ inconsistently annotated.
+ This makes comparative studies based on
+ rRNA genes difficult. We have
+ therefore created computational
+ predictors for the major rRNA species
+ from all kingdoms of life and
+ compiled them into a program called
+ RNAmmer. The program uses hidden
+ Markov models trained on data from
+ the 5S ribosomal RNA database and
+ the European ribosomal RNA database
+ project. A pre-screening step
+ makes the method fast with little loss
+ of sensitivity, enabling the
+ analysis of a complete bacterial genome
+ in less than a minute.
+ Results from running RNAmmer on a large set of
+ genomes indicate that
+ the location of rRNAs can be predicted with a
+ very high level of
+ accuracy. Novel, unannotated rRNAs are also
+ predicted in many
+ genomes. The software as well as the genome analysis
+ results are
+ available at the CBS web server.},
+ author = {Lagesen, Karin
+ and Hallin, Peter and R\o dland, Einar Andreas and
+ Staerfeldt,
+ Hans-Henrik and Rognes, Torbj\o rn and Ussery, David W},
+ doi =
+ {10.1093/nar/gkm160},
+ file = {:Users/koeho006/Library/Application
+ Support/Mendeley
+ Desktop/Downloaded/Lagesen et al. - 2007 - RNAmmer
+ consistent and
+ rapid annotation of ribosomal RNA genes.pdf:pdf},
+ issn =
+ {1362-4962},
+ journal = {Nucleic acids research},
+ keywords =
+ {Computational Biology,Computational Biology: methods,Genes,
+ rRNA,Genome, Bacterial,Genomics,Genomics: methods,Markov
+ Chains,Software},
+ mendeley-groups = {Dump/VAPP Paper,VAPP Application
+ note},
+ month = jan,
+ number = {9},
+ pages = {3100--8},
+ pmid = {17452365},
+ title = {{RNAmmer: consistent and rapid annotation of ribosomal RNA
+ genes.}},
+ volume = {35},
+ year = {2007}
+ }
+ </citation>
+ </citations>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed sappDocker/fasta2rdf.xml
--- a/sappDocker/fasta2rdf.xml Wed Jun 29 02:21:47 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,82 +0,0 @@
-<tool id="DFASTA2RDF" name="FASTA to RDF" version="0.1">
- <description></description>
- <requirements>
- <container type="docker">jjkoehorst/sappdocker:FASTA2RDF</container>
- </requirements>
- <command interpreter="docker">java -jar /fasta2rdf/target/FASTA2RDF-0.1-jar-with-dependencies.jar
-                '--type' '$source.fastaType' '--ignorestop' '$IgnoreStopCodon'
- '--input' '$input' '--output' '$output' '-organism' '$organism'
- '--ncbi_taxid' '$ncbi_taxid'
- #if len(str($identification_tag))==0
- '--idtag' ${input.name}
- #else
- '--idtag' '$identification_tag'
- #end if
- --source SAPP
-
- #for $index, $id in enumerate( $ids )
- '--id_alternative' '$id.id_tag'
- #end for
- '--id_alternative' '$input.name'
- '--codon' '$table'
- </command>
- <inputs>
- <param size="60" name="input" type="data" format="fasta" label="Fasta file for conversion" />
-
-                <conditional name="source">
-                        <param name="fastaType" type="select"
-                                label="Select if it is a Genome/Gene/Protein or program wont start!">
-                                <option value="">To be chosen</option>
-                                <option value="genome"> Genome </option>
-                                <option value="gene"> Gene</option>
-                                <option value="protein"> Protein </option>
-                                <validator type="empty_field"
-                                        message="Please select if it is a Genome, Gene or Protein" />
-                        </param>
-                </conditional>
-
- <param name="table" type="select" label="Codon table">
- <option value="1"> 1 - UNIVERSAL </option>
- <option value="2"> 2 - VERTEBRATE_MITOCHONDRIAL </option>
- <option value="3"> 3 - YEAST_MITOCHONDRIAL </option>
- <option value="4"> 4 - MOLD_MITOCHONDRIAL </option>
- <option value="5"> 5 - INVERTEBRATE_MITOCHONDRIAL </option>
- <option value="6"> 6 - CILIATE_NUCLEAR </option>
- <option value="9"> 9 - ECHINODERM_MITOCHONDRIAL </option>
- <option value="10"> 10 - EUPLOTID_NUCLEAR </option>
- <option value="11" selected="true"> 11 - BACTERIAL </option>
- <option value="12"> 12 - ALTERNATIVE_YEAST_NUCLEAR </option>
- <option value="13"> 13 - ASCIDIAN_MITOCHONDRIAL </option>
- <option value="14"> 14 - FLATWORM_MITOCHONDRIAL </option>
- <option value="15"> 15 - BLEPHARISMA_MACRONUCLEAR </option>
- <option value="16"> 16 - 2CHLOROPHYCEAN_MITOCHONDRIAL </option>
- <option value="21"> 21 - TREMATODE_MITOCHONDRIAL </option>
- <option value="23"> 23 - SCENEDESMUS_MITOCHONDRIAL </option>
- </param>
- <param size="60" name="organism" type="text" format="text"
- label="organism name" />
- <param name='IgnoreStopCodon' type='boolean'
- label='Ignore if stop codon within protein sequence' truevalue='true'
- falsevalue='false' checked="false" help='' />
-
- <param size="60" name="ncbi_taxid" type="integer" value="0"
- label="NCBI taxonomy ID" optional="False">
- <validator type="in_range" min="1"
- message="Minimum taxonomy value is 1" />
- </param>
- <param size="60" name="identification_tag" type="text" format="text"
- label="An identification tag used for RDF storage !Needs to be very unique!"
- optional="True" />
- <repeat name="ids" title="Identification tags">
- <param size="60" name="id_tag" type="text" format="text"
- label="An identification tag used by other consortiums" />
- </repeat>
- </inputs>
- <outputs>
- <data format="ttl" name="output" label="FASTA2RDF: ${input.name}" />
- </outputs>
- <help>
- RDF creation from a multi (gene/protein/genome) fasta file
- </help>
-</tool>
-

diff -r f2cbf1230026 -r fa736576c7ed signalp.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/signalp.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,59 @@
+<tool id="DSignalp" name="Signal peptide detection" version="1.0.0">
+ <description/>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:SIGNALP</container>
+ </requirements>
+ <command interpreter="docker">java -jar /signalp/signalp-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ '-signaltype' '$runtype' -input $input -output $output -format TURTLE
+ </command>
+ <inputs>
+ <param format="ttl" label="ttl genome file" name="input" type="data"/>
+ <param label="Gram+/- or Eukaryotes" name="runtype" type="select">
+ <option value="gram+">Gram+ Bacteria</option>
+ <option value="gram-">Gram- Bacteria</option>
+ <option value="euk">Eukaryotes</option>
+ </param>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="signalP: ${input.name}" name="output"/>
+ </outputs>
+ <help>Be aware that this can only be used for academic users; other
+ users are
+ requested to contact CBS Software Package Manager at
+ software@cbs.dtu.dk.
+ We are investigating alternative prediction
+ applications, please contact
+ us if you are aware of such method.
+ </help>
+ <citations>
+ <citation type="bibtex">@article{Petersen2011,
+ author = {Petersen,
+ Thomas Nordahl and Brunak, S\o ren and von Heijne,
+ Gunnar and Nielsen,
+ Henrik},
+ doi = {10.1038/nmeth.1701},
+ issn = {1548-7105},
+ journal =
+ {Nature methods},
+ keywords = {Algorithms,Cell Membrane,Cell Membrane:
+ metabolism,Computational
+ Biology,Protein Sorting Signals,Software},
+ mendeley-groups = {Dump/VAPP Paper},
+ month = jan,
+ number = {10},
+ pages =
+ {785--6},
+ pmid = {21959131},
+ publisher = {Nature Publishing Group},
+ title = {{SignalP 4.0: discriminating signal peptides from
+ transmembrane
+ regions.}},
+ url =
+ {http://www.ncbi.nlm.nih.gov/pubmed/21959131},
+ volume = {8},
+ year =
+ {2011}
+ }
+ </citation>
+ </citations>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed swisscog.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/swisscog.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,17 @@
+<tool id="DSwissCog" name="Swissprot COG annotation" version="1.0.0">
+ <description/>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:SWISSCOG</container>
+ </requirements>
+ <command interpreter="docker">java -jar /swisscog/SwissCog-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ -input $input -output $output -format TURTLE
+ </command>
+ <inputs>
+ <param format="ttl" label="genome ttl with orf prediction" name="input" type="data"/>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="SWISSCOG: ${input.name}" name="output"/>
+ </outputs>
+ <help/>
+ <citations/>
+</tool>
\ No newline at end of file

diff -r f2cbf1230026 -r fa736576c7ed tmhmm.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tmhmm.xml Mon Jul 04 10:37:59 2016 -0400

@@ -0,0 +1,92 @@
+<tool id="DTmhmm" name="Transmembrane detection" version="1.0.0">
+ <description/>
+ <requirements>
+ <container type="docker">jjkoehorst/sappdocker:TMHMM</container>
+ </requirements>
+ <command>java -jar /tmhmm/tmhmm-0.0.1-SNAPSHOT-jar-with-dependencies.jar
+ -input $input -output $output -format TURTLE
+ </command>
+ <inputs>
+ <param format="ttl" label="genome ttl with orf prediction" name="input" type="data"/>
+ </inputs>
+ <outputs>
+ <data format="ttl" label="TMHMM: ${input.name}" name="output"/>
+ </outputs>
+ <help>Be aware that this can only be used for academic users; other
+ users are
+ requested to contact CBS Software Package Manager at
+ software@cbs.dtu.dk.
+ We are investigating alternative prediction
+ applications, please contact
+ us if you are aware of such method.
+ </help>
+ <citations>
+ <citation type="bibtex">@article{Krogh2001,
+ abstract = {We describe and
+ validate a new membrane protein topology
+ prediction method, TMHMM,
+ based on a hidden Markov model. We present
+ a detailed analysis of
+ TMHMM's performance, and show that it
+ correctly predicts 97-98 \% of
+ the transmembrane helices.
+ Additionally, TMHMM can discriminate
+ between soluble and membrane
+ proteins with both specificity and
+ sensitivity better than 99 \%,
+ although the accuracy drops when signal
+ peptides are present. This
+ high degree of accuracy allowed us to
+ predict reliably integral
+ membrane proteins in a large collection of
+ genomes. Based on these
+ predictions, we estimate that 20-30 \% of all
+ genes in most genomes
+ encode membrane proteins, which is in agreement
+ with previous
+ estimates. We further discovered that proteins with
+ N(in)-C(in)
+ topologies are strongly preferred in all examined
+ organisms, except
+ Caenorhabditis elegans, where the large number of
+ 7TM receptors
+ increases the counts for N(out)-C(in) topologies. We
+ discuss the
+ possible relevance of this finding for our understanding
+ of membrane
+ protein assembly mechanisms. A TMHMM prediction service is
+ available
+ at http://www.cbs.dtu.dk/services/TMHMM/.},
+ author = {Krogh,
+ A and Larsson, B and von Heijne, G and Sonnhammer, E L},
+ doi =
+ {10.1006/jmbi.2000.4315},
+ issn = {0022-2836},
+ journal = {Journal of
+ molecular biology},
+ keywords = {Animals,Bacterial Proteins,Bacterial
+ Proteins:
+ chemistry,Computational Biology,Computational Biology:
+ methods,Databases as Topic,Fungal Proteins,Fungal Proteins:
+ chemistry,Genome,Internet,Markov Chains,Membrane Proteins,Membrane
+ Proteins: chemistry,Plant Proteins,Plant Proteins:
+ chemistry,Porins,Porins: chemistry,Protein Sorting Signals,Protein
+ Structure, Secondary,Reproducibility of Results,Research
+ Design,Sensitivity and Specificity,Software,Solubility},
+ month = jan,
+ number = {3},
+ pages = {567--80},
+ pmid = {11152613},
+ title = {{Predicting
+ transmembrane protein topology with a hidden Markov
+ model: application
+ to complete genomes.}},
+ url =
+ {http://www.sciencedirect.com/science/article/pii/S0022283600943158},
+ volume = {305},
+ year = {2001}
+ }
+
+ </citation>
+ </citations>
+</tool>