# HG changeset patch # User jjkoehorst # Date 1467643079 14400 # Node ID fa736576c7edb902a8c65fb9c56652b09911d899 # Parent f2cbf1230026c5ad62575943d83fb2b0481d11f8 planemo upload commit 16d0bc526ad02361a7c13231d4c50479c42d8d0f-dirty diff -r f2cbf1230026 -r fa736576c7ed annotation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annotation.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,13 @@ + + SAPP - Protein annotation module + + jjkoehorst/sappdocker:PROTEINANNOTATION + + java -jar /proteinannotation/proteinAnnotation-0.0.1-SNAPSHOT-jar-with-dependencies.jar '-input' '$input' '-output' '$output' -format TURTLE + + + + + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed aragorn.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aragorn.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,80 @@ + + SAPP - Aragorn tRNA and tmRNA prediction + + jjkoehorst/sappdocker:ARAGORN + + java -jar /aragorn/aragorn-0.0.1-SNAPSHOT-jar-with-dependencies.jar + '-input' '$input' '-output' '$output' '-gc' $genbank_gencode + '$tmRNA' '$tRNA' '$topology' -format TURTLE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @article{Laslett2004, + abstract = {A computer program, ARAGORN, identifies tRNA and tmRNA genes. The + program employs heuristic algorithms to predict tRNA secondary + structure, based on homology with recognized tRNA consensus sequences + and ability to form a base-paired cloverleaf. tmRNA genes are + identified using a modified version of the BRUCE program. ARAGORN + achieves a detection sensitivity of 99\% from a set of 1290 + eubacterial, eukaryotic and archaeal tRNA genes and detects all + complete tmRNA sequences in the tmRNA database, improving on the + performance of the BRUCE program. Recently discovered tmRNA genes in + the chloroplasts of two species from the 'green' algae lineage are + detected. The output of the program reports the proposed tRNA + secondary structure and, for tmRNA genes, the secondary structure of + the tRNA domain, the tmRNA gene sequence, the tag peptide and a list + of organisms with matching tmRNA peptide tags.}, + author = {Laslett, Dean and Canback, Bjorn}, + doi = {10.1093/nar/gkh152}, + file = {:Users/koeho006/Library/Application Support/Mendeley + Desktop/Downloaded/Laslett, Canback - 2004 - ARAGORN, a program to + detect tRNA genes and tmRNA genes in nucleotide sequences.pdf:pdf}, + isbn = {1362-4962 (Electronic)$\backslash$n1362-4962 (Linking)}, + issn = {03051048}, + journal = {Nucleic Acids Research}, + mendeley-groups = {VAPP Application note}, + pages = {11--16}, + pmid = {14704338}, + title = {{ARAGORN, a program to detect tRNA genes and tmRNA genes in + nucleotide sequences}}, + volume = {32}, + year = {2004} + } + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed circos.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/circos.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,36 @@ + + Circos View + + jjkoehorst/sappdocker:CIRCOS + + python3.4 /circos/circos.py '-input' '$input' -output '$output1' '$output2' + + + + + + + + Visualization of the RDF genome using CIRCOS. It requires a RDF genome with optionally gene prediction. + + + @article{Krzywinski2009, +abstract = {We created a visualization tool called Circos to facilitate the identification and analysis of similarities and differences arising from comparisons of genomes. Our tool is effective in displaying variation in genome structure and, generally, any other kind of positional relationships between genomic intervals. Such data are routinely produced by sequence alignments, hybridization arrays, genome mapping, and genotyping studies. Circos uses a circular ideogram layout to facilitate the display of relationships between pairs of positions by the use of ribbons, which encode the position, size, and orientation of related genomic elements. Circos is capable of displaying data as scatter, line, and histogram plots, heat maps, tiles, connectors, and text. Bitmap or vector images can be created from GFF-style data inputs and hierarchical configuration files, which can be easily generated by automated tools, making Circos suitable for rapid deployment in data analysis and reporting pipelines.}, +author = {Krzywinski, Martin and Schein, Jacqueline and Birol, Inan\c{c} and Connors, Joseph and Gascoyne, Randy and Horsman, Doug and Jones, Steven J and Marra, Marco A}, +doi = {10.1101/gr.092759.109}, +issn = {1549-5469}, +journal = {Genome research}, +keywords = {Animals,Chromosome Mapping,Chromosomes, Artificial, Bacterial,Chromosomes, Human, Pair 17,Chromosomes, Human, Pair 17: genetics,Chromosomes, Human, Pair 6,Chromosomes, Human, Pair 6: genetics,Contig Mapping,Dogs,Gene Dosage,Gene Dosage: genetics,Genome,Genome: genetics,Genomics,Humans,Lymphoma, Follicular,Lymphoma, Follicular: genetics,Software}, +month = sep, +number = {9}, +pages = {1639--45}, +pmid = {19541911}, +title = {{Circos: an information aesthetic for comparative genomics.}}, +url = {http://genome.cshlp.org/content/early/2009/06/15/gr.092759.109.abstract}, +volume = {19}, +year = {2009} +} + + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed crt.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crt.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,90 @@ + + + + jjkoehorst/sappdocker:CRT + + java -jar /crt/CRT-0.0.1-SNAPSHOT-jar-with-dependencies.jar + '-input' '$input' -output '$output' -format TURTLE + + + + + + + + + + CIRSPR prediction using CRT. Requires a converted + FASTA/EMBL/GenBank file. + + + + @article{Bland2007, + abstract = {BACKGROUND: + Clustered Regularly Interspaced Palindromic Repeats + (CRISPRs) are a + novel type of direct repeat found in a wide range of + bacteria and + archaea. CRISPRs are beginning to attract attention + because of their + proposed mechanism; that is, defending their hosts + against invading + extrachromosomal elements such as viruses. Existing + repeat detection + tools do a poor job of identifying CRISPRs due to + the presence of + unique spacer sequences separating the repeats. In + this study, a new + tool, CRT, is introduced that rapidly and + accurately identifies + CRISPRs in large DNA strings, such as genomes + and metagenomes. + RESULTS: CRT was compared to CRISPR detection tools, + Patscan and + Pilercr. In terms of correctness, CRT was shown to be + very reliable, + demonstrating significant improvements over Patscan + for measures + precision, recall and quality. When compared to Pilercr, + CRT showed + improved performance for recall and quality. In terms of + speed, CRT + proved to be a huge improvement over Patscan. Both CRT and + Pilercr + were comparable in speed, however CRT was faster for genomes + containing large numbers of repeats. CONCLUSION: In this paper a new + tool was introduced for the automatic detection of CRISPR elements. + This tool, CRT, showed some important improvements over current + techniques for CRISPR identification. CRT's approach to detecting + repetitive sequences is straightforward. It uses a simple sequential + scan of a DNA sequence and detects repeats directly without any major + conversion or preprocessing of the input. This leads to a program + that is easy to describe and understand; yet it is very accurate, + fast and memory efficient, being O(n) in space and O(nm/l) in time.}, + author = {Bland, Charles and Ramsey, Teresa L and Sabree, Fareedah + and Lowe, Micheal and Brown, Kyndall and Kyrpides, Nikos C and + Hugenholtz, Philip}, + doi = {10.1186/1471-2105-8-209}, + file = + {:Users/koeho006/Library/Application Support/Mendeley + Desktop/Downloaded/Bland et al. - 2007 - CRISPR recognition tool + (CRT) a tool for automatic detection of clustered regularly + interspaced palindromic repeat.pdf:pdf}, + isbn = {1471-2105 + (Electronic)$\backslash$n1471-2105 (Linking)}, + issn = {14712105}, + journal = {BMC bioinformatics}, + mendeley-groups = {VAPP Application + note}, + pages = {209}, + pmid = {17577412}, + title = {{CRISPR recognition + tool (CRT): a tool for automatic detection of + clustered regularly + interspaced palindromic repeats.}}, + volume = {8}, + year = {2007} + } + + + diff -r f2cbf1230026 -r fa736576c7ed enzdp.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/enzdp.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,16 @@ + + + + jjkoehorst/sappdocker:ENZDP + + java -jar /enzdp/enzdpRDF-0.0.1-SNAPSHOT-jar-with-dependencies.jar -input $input -output $output -format TURTLE + + + + + + + + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed fasta2rdf.xml --- a/fasta2rdf.xml Wed Jun 29 02:21:47 2016 -0400 +++ b/fasta2rdf.xml Mon Jul 04 10:37:59 2016 -0400 @@ -1,10 +1,9 @@ - SAPP - FASTA 2 RDF conversion + jjkoehorst/sappdocker:FASTA2RDF - - java -jar /fasta2rdf/target/FASTA2RDF-0.1-jar-with-dependencies.jar + java -jar /fasta2rdf/FASTA2RDF-0.1-jar-with-dependencies.jar '--type' '$source.fastaType' '--ignorestop' '$IgnoreStopCodon' '--input' '$input' '--output' '$output' '-organism' '$organism' '--ncbi_taxid' '$ncbi_taxid' @@ -31,7 +30,8 @@ - + diff -r f2cbf1230026 -r fa736576c7ed gbk2rdf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gbk2rdf.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,24 @@ + + Converts GenBank/EMBL files to RDF + + jjkoehorst/sappdocker:GBK2RDF + + java -jar /genbank2rdf/GenBank2RDF-0.0.1-SNAPSHOT-jar-with-dependencies.jar '-input' '$input' -output '$output' -source "$source" -format "$format" -identifier "${input.name}" -codon "$codon" + + + + + + + + + + + + + + + + Java Genbank or EMBL to RDF conversion + + diff -r f2cbf1230026 -r fa736576c7ed genecaller.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genecaller.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,103 @@ + + + + jjkoehorst/sappdocker:GENECALLER + + java -jar /genecaller/genecaller-0.0.1-SNAPSHOT-jar-with-dependencies.jar + '-runtype' '$runtype' -input $input -output $output -codon $codon -format TURTLE + + + + + + + + + + + + + + + + Prodigal gene prediction requires an RDF file from either a + Genome FASTA or + Genbank/EMBL format. + + + @article{Hyatt2010, + abstract = {BACKGROUND: The + quality of automated gene prediction in microbial + organisms has + improved steadily over the past decade, but there is + still room for + improvement. Increasing the number of correct + identifications, both of + genes and of the translation initiation + sites for each gene, and + reducing the overall number of false + positives, are all desirable + goals. + + RESULTS: With our years of experience in manually curating + genomes for the + Joint Genome Institute, we developed a new gene + prediction algorithm + called Prodigal (PROkaryotic DYnamic programming + Gene-finding + ALgorithm). With Prodigal, we focused specifically on the + three goals + of improved gene structure prediction, improved + translation + initiation site recognition, and reduced false positives. + We compared + the results of Prodigal to existing gene-finding methods + to + demonstrate that it met each of these objectives. + + CONCLUSION: We + built a fast, lightweight, open source gene prediction program + called + Prodigal http://compbio.ornl.gov/prodigal/. Prodigal achieved + good + results compared to existing methods, and we believe it will be + a + valuable asset to automated microbial annotation pipelines.}, + author = + {Hyatt, Doug and Chen, Gwo-Liang and Locascio, Philip F and + Land, + Miriam L and Larimer, Frank W and Hauser, Loren J}, + doi = + {10.1186/1471-2105-11-119}, + file = + {:Users/koeho006/Library/Application Support/Mendeley + Desktop/Downloaded/Hyatt et al. - 2010 - Prodigal prokaryotic gene + recognition and translation initiation site identification.pdf:pdf}, + issn = {1471-2105}, + journal = {BMC bioinformatics}, + keywords = + {Algorithms,Databases, Genetic,Genome, Bacterial,Peptide Chain + Initiation, Translational,Peptide Chain Initiation, Translational: + genetics,Prokaryotic Cells,Software}, + mendeley-groups = {Dump/VAPP + Paper}, + month = jan, + number = {1}, + pages = {119}, + pmid = {20211023}, + title = {{Prodigal: prokaryotic gene recognition and translation + initiation site identification.}}, + url = + {http://www.biomedcentral.com/1471-2105/11/119}, + volume = {11}, + year = + {2010} + } + + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed genomeInformation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genomeInformation.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,16 @@ + + Information overview + + jjkoehorst/sappdocker:GENOMEINFORMATION + + java -jar /genomeinformation/GenomeInformation-0.0.1-SNAPSHOT-jar-with-dependencies.jar -input $input -output '$output' -format 'TURTLE' + + + + + + + + Genome(s) information overview + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed interproscan.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,148 @@ + + Interproscan annotation for SAPP + + jjkoehorst/sappdocker:INTERPROSCAN + + java -jar /interproscan/interproscanRDF-0.0.1-SNAPSHOT-jar-with-dependencies.jar + '-input' '$input' '-format' 'TURTLE' + '-applications' '$appl' + '-output' + '$outfile' -v '$version' '$disable' + + + + + + + + + + + + + + + + + + + + + + + + + + Interproscan annotation suite. Select your RDF genome with + protein annotation. + This can be either from a converted GenBank/EMBL + file or from a + Prodigal prediction. + The output will be an RDF file with + protein domain annotation from + InterPro. + + + @article{Mitchell26112014, + author = {Mitchell, + Alex and Chang, Hsin-Yu and Daugherty, Louise and + Fraser, Matthew and + Hunter, Sarah and Lopez, Rodrigo and McAnulla, + Craig and McMenamin, + Conor and Nuka, Gift and Pesseat, Sebastien and + Sangrador-Vegas, Amaia + and Scheremetjew, Maxim and Rato, Claudia and + Yong, Siew-Yit and + Bateman, Alex and Punta, Marco and Attwood, Teresa + K. and Sigrist, + Christian J.A. and Redaschi, Nicole and Rivoire, + Catherine and + Xenarios, Ioannis and Kahn, Daniel and Guyot, Dominique + and Bork, Peer + and Letunic, Ivica and Gough, Julian and Oates, Matt + and Haft, Daniel + and Huang, Hongzhan and Natale, Darren A. and Wu, + Cathy H. and Orengo, + Christine and Sillitoe, Ian and Mi, Huaiyu and + Thomas, Paul D. and + Finn, Robert D.}, + title = {The InterPro protein families database: the + classification + resource after 15 years}, + year = {2014}, + doi = + {10.1093/nar/gku1243}, + abstract ={The InterPro database + (http://www.ebi.ac.uk/interpro/) is a freely + available resource that + can be used to classify sequences into + protein families and to predict + the presence of important domains and + sites. Central to the InterPro + database are predictive models, known + as signatures, from a range of + different protein family databases + that have different biological + focuses and use different + methodological approaches to classify + protein families and domains. + InterPro integrates these signatures, + capitalizing on the respective + strengths of the individual databases, + to produce a powerful protein + classification resource. Here, we report + on the status of InterPro as + it enters its 15th year of operation, and + give an overview of new + developments with the database and its + associated Web interfaces and + software. In particular, the new domain + architecture search tool is + described and the process of mapping of + Gene Ontology terms to + InterPro is outlined. We also discuss the + challenges faced by the + resource given the explosive growth in + sequence data in recent years. + InterPro (version 48.0) contains 36 766 + member database signatures + integrated into 26 238 InterPro entries, an + increase of over 3993 + entries (5081 signatures), since 2012.}, + URL = + {http://nar.oxfordjournals.org/content/early/2014/11/26/nar.gku1243.abstract}, + eprint = + {http://nar.oxfordjournals.org/content/early/2014/11/26/nar.gku1243.full.pdf+html}, + journal = {Nucleic Acids Research} + } + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed ipath.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ipath.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,39 @@ + + iPath Generator + + jjkoehorst/sappdocker:IPATH + + java -jar /ipath/iPath-0.0.1-SNAPSHOT-jar-with-dependencies.jar + #if $conditional.source_select=="single" + -group1 '$conditional.input' + #else + -group1 $conditional.input1 + -group2 $conditional.input2 + #end if + -o "$outfile" -format TURTLE + + + + + + + + + + + + + + + + + + + + + GROUP 1: #FF0000 +GROUP 2: #0000FF +BOTH: #00FF00 + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed loader.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/loader.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,17 @@ + + + + jjkoehorst/sappdocker:LOADER + + java -jar /loader/Loader-0.0.1-SNAPSHOT-jar-with-dependencies.jar + -input '$input' -endpoint "$endpoint" -format "application/x-turtle" + + + + + + + + RDF Loader into a remote SPARQL end point + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed locustagger.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/locustagger.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,18 @@ + + Locus tagger inference from original annotation + + jjkoehorst/sappdocker:LOCUSTAGGER + + java -jar /locustagger/LocusTagger-0.1-jar-with-dependencies.jar + '-input' '$input' -format 'TURTLE' -output '$output' -prefix '$prefix' + + + + + + + + Locus tag inference from original genbank/embl that was converted + to RDF + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed matrix.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matrix.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,63 @@ + + + + jjkoehorst/sappdocker:MATRIX + + java -jar /sparql/sparqljava-0.0.1-SNAPSHOT-jar-with-dependencies.jar '$separate' '-rdf' '$input' '-format' 'TURTLE' '-query' '$query' '-output' '$output' && Rscript $__tool_directory__/matrix.R '$output' '$output' + + + + + + + + + The creation of a matrix from a created SPARQL query. One should use a query that creates 3 columns for the X and Y coordinates and Z for the value. + + A header for a SPARQL query would look like SELECT ?genome ?protein ?value or SELECT ?genome ?domain (COUNT(?domain) AS ?domainC) + +----------------------------- +Genome Interpro Matrix +----------------------------- +The following query results in a matrix of genomes by Pfam accessions :: + + PREFIX biopax:<http://www.biopax.org/release/bp-level3.owl#> + PREFIX ssb:<http://csb.wur.nl/genome/> + SELECT DISTINCT ?genome ?id (COUNT(?id) AS ?value) + WHERE { + ?genome a ssb:Genome . + ?genome ssb:dnaobject ?dna . + ?dna ssb:feature ?feature . + ?feature ssb:tool ?tool . + ?feature ssb:protein ?protein . + ?protein ssb:feature ?domain . + ?domain ssb:signature ?signature . + ?signature biopax:xref ?xref . + ?xref biopax:db 'pfam' . + ?xref biopax:id ?id . + } GROUP BY ?genome ?id + +------------------- +Enzyme based matrix +------------------- + +The following query results in a matrix of genomes by EC numbers :: + + PREFIX ssb:<http://csb.wur.nl/genome/> + SELECT ?genome ?ec (COUNT(?ec) AS ?ecCount) + WHERE { + ?gene a ssb:Cds . + ?gene ssb:locus_tag ?locus . + ?gene ssb:source ?source . + ?gene ssb:protein ?protein . + ?protein ssb:feature ?feature . + { + ?feature ssb:kegg ?ec . + } UNION { + ?feature ssb:ec_number ?ec . + } + } GROUP BY ?genome ?ec + +**If you require specific questions related to the database that you created feel free to contact us.** + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed merger.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merger.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,17 @@ + + + + jjkoehorst/sappdocker:MERGER + + java -jar /merger/rdfMerge-0.0.1-SNAPSHOT-jar-with-dependencies.jar -input $input -output '$output' -format TURTLE + + + + + + + + Merges multiple genome TURTLE runs into a single end... + Handy when running multiple parallel analysis and results can be merged into a single TURTLE RDF file... + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed pathwayAnalysis.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pathwayAnalysis.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,12 @@ + + + java -jar /pathwayanalysis/pathwayAnalysis-0.0.1-SNAPSHOT-jar-with-dependencies.jar -input $input -output $output -format TURTLE + + + + + + + Pathway overview information for MetaCyc and KEGG + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed phylogeny.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phylogeny.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,16 @@ + + + + jjkoehorst/sappdocker:PHYLOGENY + + /phylogeny/phylogeny-0.0.1-SNAPSHOT-jar-with-dependencies.jar + -input $input -output $output -format TURTLE + + + + + + + Phylogeny analysis based on protein signatures + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed priam.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/priam.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,55 @@ + + + + jjkoehorst/sappdocker:PRIAM + + java -jar /priam/priam-0.0.1-SNAPSHOT-jar-with-dependencies.jar + '-input' '$input' -format TURTLE -output '$output' + + + + + + + + EC detection using PRIAM. An RDF file with protein prediction is + required. Either from Genbank/EMBL or from Prodigal gene prediction + module. + + + @article{Claudel-Renard2003, + abstract = {The + advent of fully sequenced genomes opens the ground for the + reconstruction of metabolic pathways on the basis of the + identification of enzyme-coding genes. Here we describe PRIAM, a + method for automated enzyme detection in a fully sequenced genome, + based on the classification of enzymes in the ENZYME database. PRIAM + relies on sets of position-specific scoring matrices ( profiles') + automatically tailored for each ENZYME entry. Automatically generated + logical rules define which of these profiles is required in order to + infer the presence of the corresponding enzyme in an organism. As an + example, PRIAM was applied to identify potential metabolic pathways + from the complete genome of the nitrogen-fixing bacterium + Sinorhizobium meliloti. The results of this automated method were + compared with the original genome annotation and visualised on KEGG + graphs in order to facilitate the interpretation of metabolic + pathways and to highlight potentially missing enzymes.}, + author = + {Claudel-Renard, C.}, + doi = {10.1093/nar/gkg847}, + issn = {1362-4962}, + journal = {Nucleic Acids Research}, + month = nov, + number = {22}, + pages = + {6633--6639}, + title = {{Enzyme-specific profiles for genome + annotation: PRIAM}}, + url = + {http://nar.oxfordjournals.org/content/31/22/6633.abstract?etoc}, + volume = {31}, + year = {2003} + } + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed rdf2embl.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rdf2embl.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,70 @@ + + + + jjkoehorst/sappdocker:RDF2EMBL + + java -jar /rdf2embl/rdf2embl-0.0.1-SNAPSHOT-jar-with-dependencies.jar + '-input' '$input' -output '$output' -format 'TURTLE' '-organism' + '$organism' '-strain' '$strain' '-substrain' '$substrain' '-keywords' + '$keywords' '-taxon' '$taxon' -codon '$codon' + -locus '$prefix' '-title' + '$title' '-authors' '$authors' '-consortium' '$consortium' '-journal' + '$journal' '-dataclass' '$dataclass' '-writer' '$writer' '-projectid' + '$projectid' '$pathwaytools' '-note' '$note' '-scaffold' '$scaffold' + '$gapprotein' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RDF to EMBL conversion. Locus tags are automatically generated unless locus tags have been inferred or generated through the locus module. + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed rnammer.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rnammer.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,87 @@ + + + + jjkoehorst/sappdocker:RNAMMER + + java -jar /rnammer/rnammer-0.0.1-SNAPSHOT-jar-with-dependencies.jar + '-input' '$input' -output '$output' -format TURTLE + + + + + + + + Be aware that this can only be used for academic users; other + users are + requested to contact CBS Software Package Manager at + software@cbs.dtu.dk. + We are investigating alternative prediction + applications, please contact + us if you are aware of such method. + + + @article{Lagesen2007, + abstract = {The + publication of a complete genome sequence is usually + accompanied by + annotations of its genes. In contrast to protein + coding genes, genes + for ribosomal RNA (rRNA) are often poorly or + inconsistently annotated. + This makes comparative studies based on + rRNA genes difficult. We have + therefore created computational + predictors for the major rRNA species + from all kingdoms of life and + compiled them into a program called + RNAmmer. The program uses hidden + Markov models trained on data from + the 5S ribosomal RNA database and + the European ribosomal RNA database + project. A pre-screening step + makes the method fast with little loss + of sensitivity, enabling the + analysis of a complete bacterial genome + in less than a minute. + Results from running RNAmmer on a large set of + genomes indicate that + the location of rRNAs can be predicted with a + very high level of + accuracy. Novel, unannotated rRNAs are also + predicted in many + genomes. The software as well as the genome analysis + results are + available at the CBS web server.}, + author = {Lagesen, Karin + and Hallin, Peter and R\o dland, Einar Andreas and + Staerfeldt, + Hans-Henrik and Rognes, Torbj\o rn and Ussery, David W}, + doi = + {10.1093/nar/gkm160}, + file = {:Users/koeho006/Library/Application + Support/Mendeley + Desktop/Downloaded/Lagesen et al. - 2007 - RNAmmer + consistent and + rapid annotation of ribosomal RNA genes.pdf:pdf}, + issn = + {1362-4962}, + journal = {Nucleic acids research}, + keywords = + {Computational Biology,Computational Biology: methods,Genes, + rRNA,Genome, Bacterial,Genomics,Genomics: methods,Markov + Chains,Software}, + mendeley-groups = {Dump/VAPP Paper,VAPP Application + note}, + month = jan, + number = {9}, + pages = {3100--8}, + pmid = {17452365}, + title = {{RNAmmer: consistent and rapid annotation of ribosomal RNA + genes.}}, + volume = {35}, + year = {2007} + } + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed sappDocker/fasta2rdf.xml --- a/sappDocker/fasta2rdf.xml Wed Jun 29 02:21:47 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ - - - - jjkoehorst/sappdocker:FASTA2RDF - - java -jar /fasta2rdf/target/FASTA2RDF-0.1-jar-with-dependencies.jar - '--type' '$source.fastaType' '--ignorestop' '$IgnoreStopCodon' - '--input' '$input' '--output' '$output' '-organism' '$organism' - '--ncbi_taxid' '$ncbi_taxid' - #if len(str($identification_tag))==0 - '--idtag' ${input.name} - #else - '--idtag' '$identification_tag' - #end if - --source SAPP - - #for $index, $id in enumerate( $ids ) - '--id_alternative' '$id.id_tag' - #end for - '--id_alternative' '$input.name' - '--codon' '$table' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - RDF creation from a multi (gene/protein/genome) fasta file - - - diff -r f2cbf1230026 -r fa736576c7ed signalp.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/signalp.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,59 @@ + + + + jjkoehorst/sappdocker:SIGNALP + + java -jar /signalp/signalp-0.0.1-SNAPSHOT-jar-with-dependencies.jar + '-signaltype' '$runtype' -input $input -output $output -format TURTLE + + + + + + + + + + + + + Be aware that this can only be used for academic users; other + users are + requested to contact CBS Software Package Manager at + software@cbs.dtu.dk. + We are investigating alternative prediction + applications, please contact + us if you are aware of such method. + + + @article{Petersen2011, + author = {Petersen, + Thomas Nordahl and Brunak, S\o ren and von Heijne, + Gunnar and Nielsen, + Henrik}, + doi = {10.1038/nmeth.1701}, + issn = {1548-7105}, + journal = + {Nature methods}, + keywords = {Algorithms,Cell Membrane,Cell Membrane: + metabolism,Computational + Biology,Protein Sorting Signals,Software}, + mendeley-groups = {Dump/VAPP Paper}, + month = jan, + number = {10}, + pages = + {785--6}, + pmid = {21959131}, + publisher = {Nature Publishing Group}, + title = {{SignalP 4.0: discriminating signal peptides from + transmembrane + regions.}}, + url = + {http://www.ncbi.nlm.nih.gov/pubmed/21959131}, + volume = {8}, + year = + {2011} + } + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed swisscog.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/swisscog.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,17 @@ + + + + jjkoehorst/sappdocker:SWISSCOG + + java -jar /swisscog/SwissCog-0.0.1-SNAPSHOT-jar-with-dependencies.jar + -input $input -output $output -format TURTLE + + + + + + + + + + \ No newline at end of file diff -r f2cbf1230026 -r fa736576c7ed tmhmm.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmhmm.xml Mon Jul 04 10:37:59 2016 -0400 @@ -0,0 +1,92 @@ + + + + jjkoehorst/sappdocker:TMHMM + + java -jar /tmhmm/tmhmm-0.0.1-SNAPSHOT-jar-with-dependencies.jar + -input $input -output $output -format TURTLE + + + + + + + + Be aware that this can only be used for academic users; other + users are + requested to contact CBS Software Package Manager at + software@cbs.dtu.dk. + We are investigating alternative prediction + applications, please contact + us if you are aware of such method. + + + @article{Krogh2001, + abstract = {We describe and + validate a new membrane protein topology + prediction method, TMHMM, + based on a hidden Markov model. We present + a detailed analysis of + TMHMM's performance, and show that it + correctly predicts 97-98 \% of + the transmembrane helices. + Additionally, TMHMM can discriminate + between soluble and membrane + proteins with both specificity and + sensitivity better than 99 \%, + although the accuracy drops when signal + peptides are present. This + high degree of accuracy allowed us to + predict reliably integral + membrane proteins in a large collection of + genomes. Based on these + predictions, we estimate that 20-30 \% of all + genes in most genomes + encode membrane proteins, which is in agreement + with previous + estimates. We further discovered that proteins with + N(in)-C(in) + topologies are strongly preferred in all examined + organisms, except + Caenorhabditis elegans, where the large number of + 7TM receptors + increases the counts for N(out)-C(in) topologies. We + discuss the + possible relevance of this finding for our understanding + of membrane + protein assembly mechanisms. A TMHMM prediction service is + available + at http://www.cbs.dtu.dk/services/TMHMM/.}, + author = {Krogh, + A and Larsson, B and von Heijne, G and Sonnhammer, E L}, + doi = + {10.1006/jmbi.2000.4315}, + issn = {0022-2836}, + journal = {Journal of + molecular biology}, + keywords = {Animals,Bacterial Proteins,Bacterial + Proteins: + chemistry,Computational Biology,Computational Biology: + methods,Databases as Topic,Fungal Proteins,Fungal Proteins: + chemistry,Genome,Internet,Markov Chains,Membrane Proteins,Membrane + Proteins: chemistry,Plant Proteins,Plant Proteins: + chemistry,Porins,Porins: chemistry,Protein Sorting Signals,Protein + Structure, Secondary,Reproducibility of Results,Research + Design,Sensitivity and Specificity,Software,Solubility}, + month = jan, + number = {3}, + pages = {567--80}, + pmid = {11152613}, + title = {{Predicting + transmembrane protein topology with a hidden Markov + model: application + to complete genomes.}}, + url = + {http://www.sciencedirect.com/science/article/pii/S0022283600943158}, + volume = {305}, + year = {2001} + } + + + +