annotate interproscan5/ipsfaux.xml @ 2:d1d34608dba0 draft

Uploaded
author mkh
date Fri, 29 Jan 2016 18:41:29 -0500
parents e2d4343d73ad
children 74cf3f49f10c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
e2d4343d73ad Minor fixes.
mkh
parents: 0
diff changeset
1 <tool id="ipsfaux" name="Faux Interproscan" version="1.0.0a">
0
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
2 <description>Interproscan functional predictions of ORFs (faux). Input file and computation options are
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
3 ignored (other than the types of outputs). The same precomputed results are produced every time.</description>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
4 <requirements>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
5 <!--
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
6 <requirement type="package">signalp</requirement>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
7 <requirement type="package">phobius</requirement>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
8 <requirement type="package">tmhmm</requirement>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
9 -->
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
10 <requirement type="set_environment">INTERPROSCAN_SCRIPT_PATH</requirement>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
11 </requirements>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
12
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
13 <command>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
14 #import os
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
15 echo "output_types=$output_types";
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
16
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
17 python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/fake_ips.py
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
18 ## disables the precalculated lookup service, all calculation will be run locally
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
19 -dp
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
20 --input $infile
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
21 --seqtype $seqtype
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
22 -f $output_types
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
23 --applications $appl
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
24 --tempdir \$TEMP
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
25
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
26 $pathways
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
27 $goterms
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
28 $iprlookup
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
29 $mode
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
30 --output-file-base __base__
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
31 2>&#38;1;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
32
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
33 #if 'tsv' in str($output_types):
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
34 mv __base__.tsv $tsv_file;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
35 #end if
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
36
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
37 #if 'gff3' in str($output_types):
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
38 mv __base__.gff3 $gff3_file;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
39 #end if
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
40
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
41 #if 'xml' in str($output_types):
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
42 mv __base__.xml $xml_file;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
43 #end if
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
44
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
45 #if 'html' in str($output_types):
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
46 mkdir -p $html_file.files_path;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
47 #set temp_archive_file = '__base__.html.tar.gz'
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
48 tar -C $html_file.files_path -xvmzf $temp_archive_file;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
49 python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_index.py $html_file $html_file.files_path;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
50 rm $temp_archive_file;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
51 #end if
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
52
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
53 #if 'svg' in str($output_types):
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
54 mkdir -p $svg_file.files_path;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
55 #set temp_archive_file = '__base__.svg.tar.gz'
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
56 tar -C $svg_file.files_path -xvmzf $temp_archive_file;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
57 python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_index.py $svg_file $svg_file.files_path;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
58 rm $temp_archive_file;
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
59 #end if
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
60
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
61 echo "End of ipsfaux"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
62 </command>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
63
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
64 <inputs>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
65 <param name="infile" type="data" format="fasta" label="Sequence Fasta File (ignored -- faux mode)"/>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
66
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
67 <param name="seqtype" type="select" label="Type of the input sequences" help="">
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
68 <option value="p" selected="true">Protein</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
69 <option value="n">DNA / RNA</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
70 </param>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
71
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
72 <param name="mode" type="boolean" label="Run on cluster?" help="Check to submit job to cluster."
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
73 truevalue="--mode=cluster --clusterrunid=gtdi-ips-analysis"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
74 falsevalue=""/>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
75
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
76 <param name="appl" type="select" multiple="True" display="checkboxes" label="Applications to run"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
77 help="Select your programm.">
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
78 <option value="TIGRFAM"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
79 selected="true">TIGRFAM: protein families based on Hidden Markov Models or HMMs</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
80 <option value="PIRSF" selected="true">PIRSF: non-overlapping clustering of UniProtKB sequences into a hierarchical order (evolutionary relationships)</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
81 <option value="ProDom"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
82 selected="true">ProDom: set of protein domain families generated from the UniProtKB</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
83 <option value="Panther"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
84 selected="true">Panther: Protein ANalysis THrough Evolutionary Relationships</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
85 <option value="SMART" selected="true">SMART: identification and analysis of domain architectures based on Hidden Markov Models or HMMs</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
86 <option value="PrositeProfiles" selected="true">PROSITE Profiles: protein domains, families and functional sites as well as associated profiles to identify them</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
87 <option value="PrositePatterns" selected="true">PROSITE Pattern: protein domains, families and functional sites as well as associated patterns to identify them</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
88 <option value="HAMAP"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
89 selected="true">HAMAP: High-quality Automated Annotation of Microbial Proteomes</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
90 <option value="PfamA" selected="true">PfamA: protein families, each represented by multiple sequence alignments and hidden Markov models</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
91 <option value="PRINTS" selected="true">PRINTS: group of conserved motifs (fingerprints) used to characterise a protein family</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
92 <option value="SuperFamily"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
93 selected="true">SUPERFAMILY: database of structural and functional annotation</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
94 <option value="Coils" selected="true">Coils: Prediction of Coiled Coil Regions in Proteins</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
95 <option value="Gene3d" selected="true">Gene3d: Structural assignment for whole genes and genomes using the CATH domain structure database</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
96 <option value="SignalP-GRAM_POSITIVE" selected="false">SignalP Gram Positive Bacteria</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
97 <option value="SignalP-GRAM_NEGATIVE" selected="false">SignalP Gram Negative Bacteria</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
98 <option value="SignalP-EUK" selected="false">SignalP Eukaryotic Bacteria</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
99 <option value="Phobius" selected="false">Phobius: combined transmembrane topology and signal peptide predictor</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
100 <option value="TMHMM" selected="false">TMHMM: Prediction of transmembrane helices in proteins</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
101 </param>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
102
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
103 <param name="pathways" truevalue="--pathways" falsevalue="" checked="True" type="boolean"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
104 label="Include pathway information"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
105 help="Option that provides mappings from matches to pathway information, which is based on the matched manually curated InterPro entries. (--pathways)"/>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
106 <param name="goterms" truevalue="--goterms" falsevalue="" checked="True" type="boolean"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
107 label="Include Gene Ontology (GO) mappings"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
108 help="Look up of corresponding Gene Ontology annotation. Implies -iprlookup option. (--goterms)"/>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
109
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
110 <param name="iprlookup" truevalue="--iprlookup" falsevalue="" checked="False" type="boolean"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
111 label="Provide additional mappings"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
112 help="Provide mappings from matched member database signatures to the InterPro entries that they are integrated into (--iprlookup)"/>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
113
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
114 <param name="output_types" type="select" display="checkboxes" multiple="true" label="Output formats"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
115 help="Select the output formats to generate (at least one)">
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
116 <option value="tsv" selected="true">Raw (TSV)</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
117 <option value="html" selected="true">HTML (Graphical)</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
118 <option value="svg" selected="false">SVG (Scalable Vector Graphics)</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
119 <option value="gff3" selected="true">GFF3</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
120 <option value="xml" selected="false">XML</option>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
121 </param>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
122 </inputs>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
123
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
124 <outputs>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
125 <data format="tabular" name="tsv_file" label="Interproscan TSV on ${on_string}">
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
126 <filter>'tsv' in output_types</filter>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
127 </data>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
128 <data format="html" name="html_file" label="Interproscan HTML on ${on_string}">
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
129 <filter>'html' in output_types</filter>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
130 </data>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
131 <data format="html" name="svg_file" label="Interproscan SVG on ${on_string}">
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
132 <filter>'svg' in output_types</filter>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
133 </data>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
134 <data format="gff3" name="gff3_file" label="Interproscan GFF3 on ${on_string}">
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
135 <filter>'gff3' in output_types</filter>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
136 </data>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
137 <data format="xml" name="xml_file" label="Interproscan XML on ${on_string}">
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
138 <filter>'xml' in output_types</filter>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
139 </data>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
140 </outputs>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
141
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
142 <requirements>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
143 </requirements>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
144
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
145 <help>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
146
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
147 **What it does**
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
148
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
149 Interproscan is a batch tool to query the Interpro database. It provides annotations based on multiple searches of profile and other functional databases.
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
150
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
151
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
152 #####
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
153 Input
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
154 #####
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
155
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
156 Required is a FASTA file containing protein or nucleotide sequences.
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
157
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
158
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
159 ######
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
160 Output
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
161 ######
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
162
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
163 In this version of InterProScan_, you can retrieve output in any of the following five formats:
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
164
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
165 * TSV: a simple tab-delimited file format
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
166 * XML: the new "IMPACT" XML format (XSD available here_).
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
167 * GFF: The `GFF 3.0`_ format
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
168 * HTML: An HTML representation of the protein matches
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
169 * SVG: An Scalable Vector Graphics representation of the protein matches
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
170
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
171
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
172 .. _`GFF 3.0`: http://gmod.org/wiki/GFF#GFF3_Format
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
173 .. _here: http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
174
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
175
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
176
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
177 Tab-separated values format (TSV)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
178 =================================
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
179
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
180 Basic tab delimited format.
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
181
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
182
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
183 Example Output
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
184 --------------
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
185
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
186 ::
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
187
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
188 P51587 14086411a2cdf1c4cba63020e1622579 3418 Pfam PF09103 BRCA2, oligonucleotide/oligosaccharide-binding, domain 1 2670 2799 7.9E-43 T 15-03-2013
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
189 P51587 14086411a2cdf1c4cba63020e1622579 3418 ProSiteProfiles PS50138 BRCA2 repeat profile. 1002 1036 0.0 T 18-03-2013 IPR002093 BRCA2 repeat GO:0005515|GO:0006302
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
190 P51587 14086411a2cdf1c4cba63020e1622579 3418 Gene3D G3DSA:2.40.50.140 2966 3051 3.1E-52 T 15-03-2013
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
191 ...
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
192
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
193
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
194 The TSV format presents the match data in columns as follows:
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
195
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
196 - Protein Accession (e.g. P51587)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
197 - Sequence MD5 digest (e.g. 14086411a2cdf1c4cba63020e1622579)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
198 - Sequence Length (e.g. 3418)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
199 - Analysis (e.g. Pfam / PRINTS / Gene3D)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
200 - Signature Accession (e.g. PF09103 / G3DSA:2.40.50.140)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
201 - Signature Description (e.g. BRCA2 repeat profile)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
202 - Start location
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
203 - Stop location
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
204 - Score - is the e-value of the match reported by member database method (e.g. 3.1E-52)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
205 - Status - is the status of the match (T: true)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
206 - Date - is the date of the run
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
207 - (InterProScan_ annotations - accession (e.g. IPR002093) - optional column; only displayed if -iprscan option is switched on)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
208 - (InterProScan_ annotations - description (e.g. BRCA2 repeat) - optional column; only displayed if -iprscan option is switched on)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
209 - (GO annotations (e.g. GO:0005515) - optional column; only displayed if --goterms option is switched on)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
210 - (Pathways annotations (e.g. REACT_71) - optional column; only displayed if --pathways option is switched on)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
211
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
212
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
213 Extensible Markup Language (XML)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
214 ================================
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
215
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
216 XML representation of the matches - this is the richest form of the data. The XML Schema Definition (XSD) is available [http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5 here].
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
217
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
218 Example Output
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
219 --------------
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
220
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
221 .. image:: $PATH_TO_IMAGES/example_xml_output.png
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
222
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
223
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
224
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
225 Generic Feature Format Version 3 (GFF3)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
226 =======================================
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
227
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
228 The GFF3 format is a flat tab-delimited file, which is much richer then the TSV output format. It allows you to trace back from matches to predicted proteins and to nucleic acid sequences. It also contains a FASTA format representation of the predicted protein sequences and their matches. You will find a documentation of all the columns and attributes used on [http://www.sequenceontology.org/gff3.shtml].
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
229
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
230 Example Output
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
231 --------------
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
232
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
233 ::
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
234
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
235 ##gff-version 3
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
236 ##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
237 ##sequence-region AACH01000027 1 1347
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
238 ##seqid|source|type|start|end|score|strand|phase|attributes
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
239 AACH01000027 provided_by_user nucleic_acid 1 1347 . + . Name=AACH01000027;md5=b2a7416cb92565c004becb7510f46840;ID=AACH01000027
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
240 AACH01000027 getorf ORF 1 1347 . + . Name=AACH01000027.2_21;Target=pep_AACH01000027_1_1347 1 449;md5=b2a7416cb92565c004becb7510f46840;ID=orf_AACH01000027_1_1347
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
241 AACH01000027 getorf polypeptide 1 449 . + . md5=fd0743a673ac69fb6e5c67a48f264dd5;ID=pep_AACH01000027_1_1347
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
242 AACH01000027 Pfam protein_match 84 314 1.2E-45 + . Name=PF00696;signature_desc=Amino acid kinase family;Target=null 84 314;status=T;ID=match$8_84_314;Ontology_term="GO:0008652";date=15-04-2013;Dbxref="InterPro:IPR001048","Reactome:REACT_13"
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
243 ##sequence-region 2
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
244 ...
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
245 >pep_AACH01000027_1_1347
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
246 LVLLAAFDCIDDTKLVKQIIISEIINSLPNIVNDKYGRKVLLYLLSPRDPAHTVREIIEV
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
247 LQKGDGNAHSKKDTEIRRREMKYKRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEA
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
248 GHELILVSSGAIAAGFGALGFKKRPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQI
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
249 LLTQDDFVDKRRYKNAHQALSVLLNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQ
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
250 ADLLVFLTDVDGLYTGNPNSDPRAKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAA
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
251 TIATESGVPVYICSSLKSDSMIEAAEETEDGSYFVAQEKGLRTQKQWLAFYAQSQGSIWV
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
252 DKGAAEALSQYGKSLLLSGIVEAEGVFSYGDIVTVFDKESGKSLGKGRVQFGASALEDML
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
253 RSQKAKGVLIYRDDWISITPEIQLLFTEF
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
254 ...
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
255 >match$8_84_314
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
256 KRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEAGHELILVSSGAIAAGFGALGFKK
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
257 RPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQILLTQDDFVDKRRYKNAHQALSVL
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
258 LNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQADLLVFLTDVDGLYTGNPNSDPR
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
259 AKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAATIATESGVPVYICS
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
260
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
261
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
262 Scalable Vector Graphics (SVG) and HyperText Markup Language (HTML)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
263 ====================================================================
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
264
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
265 InterProScan_ 5 outputs a single HTML/SVG file for each protein sequence analysed.
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
266
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
267
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
268 Example Output
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
269 --------------
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
270
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
271 .. image:: $PATH_TO_IMAGES/P51587.svg.png
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
272
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
273 .. _InterProScan: http://www.ebi.ac.uk/interpro
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
274
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
275
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
276 ----------
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
277 References
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
278 ----------
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
279
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
280
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
281 If you use this Galaxy tool in work leading to a scientific publication please
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
282 cite the following papers:
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
283
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
284 Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
285 Galaxy tools and workflows for sequence analysis with applications
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
286 in molecular plant pathology. PeerJ 1:e167
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
287 http://dx.doi.org/10.7717/peerj.167
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
288
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
289 Zdobnov EM, Apweiler R (2001)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
290 InterProScan an integration platform for the signature-recognition methods in InterPro.
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
291 Bioinformatics 17, 847-848.
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
292 http://dx.doi.org/10.1093/bioinformatics/17.9.847
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
293
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
294 Quevillon E, Silventoinen V, Pillai S, Harte N, Mulder N, Apweiler R, Lopez R (2005)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
295 InterProScan: protein domains identifier.
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
296 Nucleic Acids Research 33 (Web Server issue), W116-W120.
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
297 http://dx.doi.org/10.1093/nar/gki442
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
298
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
299 Hunter S, Apweiler R, Attwood TK, Bairoch A, Bateman A, Binns D, Bork P, Das U, Daugherty L, Duquenne L, Finn RD, Gough J, Haft D, Hulo N, Kahn D, Kelly E, Laugraud A, Letunic I, Lonsdale D, Lopez R, Madera M, Maslen J, McAnulla C, McDowall J, Mistry J, Mitchell A, Mulder N, Natale D, Orengo C, Quinn AF, Selengut JD, Sigrist CJ, Thimma M, Thomas PD, Valentin F, Wilson D, Wu CH, Yeats C. (2009)
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
300 InterPro: the integrative protein signature database.
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
301 Nucleic Acids Research 37 (Database Issue), D224-228.
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
302 http://dx.doi.org/10.1093/nar/gkn785
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
303
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
304
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
305 This wrapper is available to install into other Galaxy Instances via the Galaxy Tool Shed at
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
306 http://toolshed.g2.bx.psu.edu/view/bgruening/interproscan5
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
307
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
308
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
309 **Galaxy Wrapper Author**::
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
310
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
311 * Bjoern Gruening, University of Freiburg
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
312 * Konrad Paszkiewicz, University of Exeter
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
313
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
314 </help>
fe8e43a26870 Uploaded
mkh
parents:
diff changeset
315 </tool>