annotate interproscan5/ipsfaux.xml @ 0:0da2847fc108 draft default tip

Uploaded
author si-datascience
date Thu, 24 May 2018 14:57:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
1 <tool id="ipsfaux" name="Faux Interproscan" version="1.0.2a">
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
2 <description>Interproscan functional predictions of ORFs (faux). Input file and computation options are
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
3 ignored (other than the types of outputs). The same precomputed results are produced every time.</description>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
4 <requirements>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
5 <!--
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
6 <requirement type="package">signalp</requirement>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
7 <requirement type="package">phobius</requirement>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
8 <requirement type="package">tmhmm</requirement>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
9 -->
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
10 <requirement type="set_environment">INTERPROSCAN_SCRIPT_PATH</requirement>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
11 </requirements>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
12
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
13 <command>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
14 #import os
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
15 echo "Start timestamp: \$(date)";
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
16 python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/fake_ips.py
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
17 ## disables the precalculated lookup service, all calculation will be run locally
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
18 -dp
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
19 --input $infile
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
20 --seqtype $seqtype
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
21 -f tsv,$output_types
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
22 --applications $appl
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
23
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
24 $pathways
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
25 $goterms
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
26 $iprlookup
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
27 $mode
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
28 --output-file-base __base__
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
29 2>&#38;1;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
30
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
31 mv __base__.tsv $tsv_file;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
32
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
33 #if 'gff3' in str($output_types):
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
34 mv __base__.gff3 $gff3_file;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
35 #end if
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
36
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
37 #if 'xml' in str($output_types):
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
38 mv __base__.xml $xml_file;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
39 #end if
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
40
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
41 #if 'html' in str($output_types):
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
42 mkdir -p $html_file.files_path;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
43 #set temp_archive_file = '__base__.html.tar.gz'
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
44 tar -C $html_file.files_path -xvmzf $temp_archive_file;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
45 #if str($seqtype) == 'p' and not str($getorfed):
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
46 python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 0 $tsv_file $html_file $html_file.files_path;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
47 #else
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
48 python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 1 $tsv_file $html_file $html_file.files_path;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
49 #end if
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
50 rm $temp_archive_file;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
51 #end if
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
52
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
53 #if 'svg' in str($output_types):
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
54 mkdir -p $svg_file.files_path;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
55 #set temp_archive_file = '__base__.svg.tar.gz'
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
56 tar -C $svg_file.files_path -xvmzf $temp_archive_file;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
57 #if str($seqtype) == 'p' and not str($getorfed):
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
58 python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 0 $tsv_file $svg_file $svg_file.files_path;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
59 #else
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
60 python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 1 $tsv_file $svg_file $svg_file.files_path;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
61 #end if
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
62 rm $temp_archive_file;
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
63 #end if
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
64
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
65 echo "End timestamp: \$(date)"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
66 </command>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
67
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
68 <inputs>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
69 <param name="infile" type="data" format="fasta" label="Sequence Fasta File (ignored -- faux mode)"/>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
70
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
71 <param name="seqtype" type="select" label="Type of the input sequences" help="">
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
72 <option value="p" selected="true">Protein</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
73 <option value="n">DNA / RNA</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
74 </param>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
75
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
76 <param name="getorfed" type="boolean" label="DNA/RNA was externally processed by getorf?" help="Ignored for DNA inputs."
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
77 truevalue="getorfed"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
78 falsevalue=""/>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
79
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
80 <param name="mode" type="boolean" label="Run on cluster?" help="Check to submit job to cluster."
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
81 truevalue="--mode=cluster --clusterrunid=gtdi-ips-analysis"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
82 falsevalue=""/>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
83
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
84 <param name="appl" type="select" multiple="True" display="checkboxes" label="Applications to run"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
85 help="Select your programm.">
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
86 <option value="TIGRFAM"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
87 selected="true">TIGRFAM: protein families based on Hidden Markov Models or HMMs</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
88 <option value="PIRSF" selected="true">PIRSF: non-overlapping clustering of UniProtKB sequences into a hierarchical order (evolutionary relationships)</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
89 <option value="ProDom"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
90 selected="true">ProDom: set of protein domain families generated from the UniProtKB</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
91 <option value="Panther"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
92 selected="true">Panther: Protein ANalysis THrough Evolutionary Relationships</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
93 <option value="SMART" selected="true">SMART: identification and analysis of domain architectures based on Hidden Markov Models or HMMs</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
94 <option value="PrositeProfiles" selected="true">PROSITE Profiles: protein domains, families and functional sites as well as associated profiles to identify them</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
95 <option value="PrositePatterns" selected="true">PROSITE Pattern: protein domains, families and functional sites as well as associated patterns to identify them</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
96 <option value="HAMAP"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
97 selected="true">HAMAP: High-quality Automated Annotation of Microbial Proteomes</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
98 <option value="PfamA" selected="true">PfamA: protein families, each represented by multiple sequence alignments and hidden Markov models</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
99 <option value="PRINTS" selected="true">PRINTS: group of conserved motifs (fingerprints) used to characterise a protein family</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
100 <option value="SuperFamily"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
101 selected="true">SUPERFAMILY: database of structural and functional annotation</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
102 <option value="Coils" selected="true">Coils: Prediction of Coiled Coil Regions in Proteins</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
103 <option value="Gene3d" selected="true">Gene3d: Structural assignment for whole genes and genomes using the CATH domain structure database</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
104 <option value="SignalP-GRAM_POSITIVE" selected="false">SignalP Gram Positive Bacteria</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
105 <option value="SignalP-GRAM_NEGATIVE" selected="false">SignalP Gram Negative Bacteria</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
106 <option value="SignalP-EUK" selected="false">SignalP Eukaryotic Bacteria</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
107 <option value="Phobius" selected="false">Phobius: combined transmembrane topology and signal peptide predictor</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
108 <option value="TMHMM" selected="false">TMHMM: Prediction of transmembrane helices in proteins</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
109 </param>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
110
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
111 <param name="pathways" truevalue="--pathways" falsevalue="" checked="True" type="boolean"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
112 label="Include pathway information"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
113 help="Option that provides mappings from matches to pathway information, which is based on the matched manually curated InterPro entries. (--pathways)"/>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
114 <param name="goterms" truevalue="--goterms" falsevalue="" checked="True" type="boolean"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
115 label="Include Gene Ontology (GO) mappings"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
116 help="Look up of corresponding Gene Ontology annotation. Implies -iprlookup option. (--goterms)"/>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
117
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
118 <param name="iprlookup" truevalue="--iprlookup" falsevalue="" checked="False" type="boolean"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
119 label="Provide additional mappings"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
120 help="Provide mappings from matched member database signatures to the InterPro entries that they are integrated into (--iprlookup)"/>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
121
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
122 <param name="output_types" type="select" display="checkboxes" multiple="true" label="Output formats (in addition to TSV)"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
123 help="Select the output formats to generate (at least one)">
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
124 <option value="html" selected="true">HTML (Graphical)</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
125 <option value="svg" selected="false">SVG (Scalable Vector Graphics)</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
126 <option value="gff3" selected="true">GFF3</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
127 <option value="xml" selected="false">XML</option>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
128 </param>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
129 </inputs>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
130
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
131 <outputs>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
132 <data format="tabular" name="tsv_file" label="Interproscan TSV on ${on_string}"/>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
133 <data format="html" name="html_file" label="Interproscan HTML on ${on_string}">
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
134 <filter>'html' in output_types</filter>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
135 </data>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
136 <data format="html" name="svg_file" label="Interproscan SVG on ${on_string}">
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
137 <filter>'svg' in output_types</filter>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
138 </data>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
139 <data format="gff3" name="gff3_file" label="Interproscan GFF3 on ${on_string}">
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
140 <filter>'gff3' in output_types</filter>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
141 </data>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
142 <data format="xml" name="xml_file" label="Interproscan XML on ${on_string}">
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
143 <filter>'xml' in output_types</filter>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
144 </data>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
145 </outputs>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
146
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
147 <requirements>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
148 </requirements>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
149
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
150 <help>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
151
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
152 **What it does**
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
153
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
154 Interproscan is a batch tool to query the Interpro database. It provides annotations based on multiple searches of profile and other functional databases.
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
155
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
156
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
157 #####
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
158 Input
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
159 #####
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
160
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
161 Required is a FASTA file containing protein or nucleotide sequences.
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
162
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
163
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
164 ######
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
165 Output
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
166 ######
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
167
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
168 In this version of InterProScan_, you can retrieve output in any of the following five formats:
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
169
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
170 * TSV: a simple tab-delimited file format
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
171 * XML: the new "IMPACT" XML format (XSD available here_).
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
172 * GFF: The `GFF 3.0`_ format
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
173 * HTML: An HTML representation of the protein matches
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
174 * SVG: An Scalable Vector Graphics representation of the protein matches
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
175
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
176
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
177 .. _`GFF 3.0`: http://gmod.org/wiki/GFF#GFF3_Format
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
178 .. _here: http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
179
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
180
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
181
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
182 Tab-separated values format (TSV)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
183 =================================
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
184
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
185 Basic tab delimited format.
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
186
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
187
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
188 Example Output
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
189 --------------
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
190
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
191 ::
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
192
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
193 P51587 14086411a2cdf1c4cba63020e1622579 3418 Pfam PF09103 BRCA2, oligonucleotide/oligosaccharide-binding, domain 1 2670 2799 7.9E-43 T 15-03-2013
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
194 P51587 14086411a2cdf1c4cba63020e1622579 3418 ProSiteProfiles PS50138 BRCA2 repeat profile. 1002 1036 0.0 T 18-03-2013 IPR002093 BRCA2 repeat GO:0005515|GO:0006302
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
195 P51587 14086411a2cdf1c4cba63020e1622579 3418 Gene3D G3DSA:2.40.50.140 2966 3051 3.1E-52 T 15-03-2013
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
196 ...
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
197
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
198
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
199 The TSV format presents the match data in columns as follows:
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
200
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
201 - Protein Accession (e.g. P51587)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
202 - Sequence MD5 digest (e.g. 14086411a2cdf1c4cba63020e1622579)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
203 - Sequence Length (e.g. 3418)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
204 - Analysis (e.g. Pfam / PRINTS / Gene3D)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
205 - Signature Accession (e.g. PF09103 / G3DSA:2.40.50.140)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
206 - Signature Description (e.g. BRCA2 repeat profile)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
207 - Start location
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
208 - Stop location
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
209 - Score - is the e-value of the match reported by member database method (e.g. 3.1E-52)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
210 - Status - is the status of the match (T: true)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
211 - Date - is the date of the run
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
212 - (InterProScan_ annotations - accession (e.g. IPR002093) - optional column; only displayed if -iprscan option is switched on)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
213 - (InterProScan_ annotations - description (e.g. BRCA2 repeat) - optional column; only displayed if -iprscan option is switched on)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
214 - (GO annotations (e.g. GO:0005515) - optional column; only displayed if --goterms option is switched on)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
215 - (Pathways annotations (e.g. REACT_71) - optional column; only displayed if --pathways option is switched on)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
216
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
217
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
218 Extensible Markup Language (XML)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
219 ================================
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
220
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
221 XML representation of the matches - this is the richest form of the data. The XML Schema Definition (XSD) is available [http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5 here].
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
222
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
223 Example Output
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
224 --------------
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
225
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
226 .. image:: $PATH_TO_IMAGES/example_xml_output.png
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
227
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
228
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
229
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
230 Generic Feature Format Version 3 (GFF3)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
231 =======================================
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
232
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
233 The GFF3 format is a flat tab-delimited file, which is much richer then the TSV output format. It allows you to trace back from matches to predicted proteins and to nucleic acid sequences. It also contains a FASTA format representation of the predicted protein sequences and their matches. You will find a documentation of all the columns and attributes used on [http://www.sequenceontology.org/gff3.shtml].
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
234
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
235 Example Output
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
236 --------------
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
237
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
238 ::
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
239
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
240 ##gff-version 3
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
241 ##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
242 ##sequence-region AACH01000027 1 1347
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
243 ##seqid|source|type|start|end|score|strand|phase|attributes
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
244 AACH01000027 provided_by_user nucleic_acid 1 1347 . + . Name=AACH01000027;md5=b2a7416cb92565c004becb7510f46840;ID=AACH01000027
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
245 AACH01000027 getorf ORF 1 1347 . + . Name=AACH01000027.2_21;Target=pep_AACH01000027_1_1347 1 449;md5=b2a7416cb92565c004becb7510f46840;ID=orf_AACH01000027_1_1347
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
246 AACH01000027 getorf polypeptide 1 449 . + . md5=fd0743a673ac69fb6e5c67a48f264dd5;ID=pep_AACH01000027_1_1347
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
247 AACH01000027 Pfam protein_match 84 314 1.2E-45 + . Name=PF00696;signature_desc=Amino acid kinase family;Target=null 84 314;status=T;ID=match$8_84_314;Ontology_term="GO:0008652";date=15-04-2013;Dbxref="InterPro:IPR001048","Reactome:REACT_13"
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
248 ##sequence-region 2
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
249 ...
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
250 >pep_AACH01000027_1_1347
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
251 LVLLAAFDCIDDTKLVKQIIISEIINSLPNIVNDKYGRKVLLYLLSPRDPAHTVREIIEV
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
252 LQKGDGNAHSKKDTEIRRREMKYKRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEA
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
253 GHELILVSSGAIAAGFGALGFKKRPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQI
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
254 LLTQDDFVDKRRYKNAHQALSVLLNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQ
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
255 ADLLVFLTDVDGLYTGNPNSDPRAKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAA
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
256 TIATESGVPVYICSSLKSDSMIEAAEETEDGSYFVAQEKGLRTQKQWLAFYAQSQGSIWV
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
257 DKGAAEALSQYGKSLLLSGIVEAEGVFSYGDIVTVFDKESGKSLGKGRVQFGASALEDML
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
258 RSQKAKGVLIYRDDWISITPEIQLLFTEF
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
259 ...
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
260 >match$8_84_314
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
261 KRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEAGHELILVSSGAIAAGFGALGFKK
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
262 RPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQILLTQDDFVDKRRYKNAHQALSVL
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
263 LNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQADLLVFLTDVDGLYTGNPNSDPR
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
264 AKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAATIATESGVPVYICS
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
265
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
266
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
267 Scalable Vector Graphics (SVG) and HyperText Markup Language (HTML)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
268 ====================================================================
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
269
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
270 InterProScan_ 5 outputs a single HTML/SVG file for each protein sequence analysed.
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
271
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
272
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
273 Example Output
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
274 --------------
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
275
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
276 .. image:: $PATH_TO_IMAGES/P51587.svg.png
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
277
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
278 .. _InterProScan: http://www.ebi.ac.uk/interpro
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
279
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
280
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
281 ----------
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
282 References
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
283 ----------
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
284
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
285
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
286 If you use this Galaxy tool in work leading to a scientific publication please
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
287 cite the following papers:
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
288
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
289 Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
290 Galaxy tools and workflows for sequence analysis with applications
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
291 in molecular plant pathology. PeerJ 1:e167
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
292 http://dx.doi.org/10.7717/peerj.167
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
293
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
294 Zdobnov EM, Apweiler R (2001)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
295 InterProScan an integration platform for the signature-recognition methods in InterPro.
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
296 Bioinformatics 17, 847-848.
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
297 http://dx.doi.org/10.1093/bioinformatics/17.9.847
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
298
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
299 Quevillon E, Silventoinen V, Pillai S, Harte N, Mulder N, Apweiler R, Lopez R (2005)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
300 InterProScan: protein domains identifier.
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
301 Nucleic Acids Research 33 (Web Server issue), W116-W120.
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
302 http://dx.doi.org/10.1093/nar/gki442
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
303
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
304 Hunter S, Apweiler R, Attwood TK, Bairoch A, Bateman A, Binns D, Bork P, Das U, Daugherty L, Duquenne L, Finn RD, Gough J, Haft D, Hulo N, Kahn D, Kelly E, Laugraud A, Letunic I, Lonsdale D, Lopez R, Madera M, Maslen J, McAnulla C, McDowall J, Mistry J, Mitchell A, Mulder N, Natale D, Orengo C, Quinn AF, Selengut JD, Sigrist CJ, Thimma M, Thomas PD, Valentin F, Wilson D, Wu CH, Yeats C. (2009)
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
305 InterPro: the integrative protein signature database.
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
306 Nucleic Acids Research 37 (Database Issue), D224-228.
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
307 http://dx.doi.org/10.1093/nar/gkn785
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
308
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
309
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
310 This wrapper is available to install into other Galaxy Instances via the Galaxy Tool Shed at
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
311 http://toolshed.g2.bx.psu.edu/view/bgruening/interproscan5
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
312
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
313
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
314 **Galaxy Wrapper Author**::
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
315
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
316 * Bjoern Gruening, University of Freiburg
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
317 * Konrad Paszkiewicz, University of Exeter
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
318
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
319 </help>
0da2847fc108 Uploaded
si-datascience
parents:
diff changeset
320 </tool>