comparison alphafold.xml @ 9:3bd420ec162d draft

planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
author galaxy-australia
date Tue, 13 Sep 2022 22:04:12 +0000
parents ca90d17ff51b
children 072c324f20fc
comparison
equal deleted inserted replaced
8:ca90d17ff51b 9:3bd420ec162d
1 <tool id="alphafold" name="Alphafold 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> 1 <tool id="alphafold" name="Alphafold 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description> - AI-guided 3D structural prediction of proteins</description> 2 <description> - AI-guided 3D structural prediction of proteins</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">2.1.2</token> 4 <token name="@TOOL_VERSION@">2.1.2</token>
5 <token name="@VERSION_SUFFIX@">1</token> 5 <token name="@VERSION_SUFFIX@">2</token>
6 </macros> 6 </macros>
7 <edam_topics> 7 <edam_topics>
8 <edam_topic>topic_0082</edam_topic> 8 <edam_topic>topic_0082</edam_topic>
9 </edam_topics> 9 </edam_topics>
10 <edam_operations> 10 <edam_operations>
30 #end if 30 #end if
31 31
32 python3 '$__tool_directory__/validate_fasta.py' input.fasta 32 python3 '$__tool_directory__/validate_fasta.py' input.fasta
33 --min_length \${ALPHAFOLD_AA_LENGTH_MIN:-0} 33 --min_length \${ALPHAFOLD_AA_LENGTH_MIN:-0}
34 --max_length \${ALPHAFOLD_AA_LENGTH_MAX:-0} 34 --max_length \${ALPHAFOLD_AA_LENGTH_MAX:-0}
35 #if $multimer:
36 --multimer
37 #end if
35 > alphafold.fasta && 38 > alphafold.fasta &&
36 39
37 ## env vars ------------------------------- 40 ## env vars -------------------------------
38 export TF_FORCE_UNIFIED_MEMORY=1 && 41 export TF_FORCE_UNIFIED_MEMORY=1 &&
39 export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0 && 42 export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0 &&
44 --fasta_paths alphafold.fasta 47 --fasta_paths alphafold.fasta
45 --output_dir output 48 --output_dir output
46 --data_dir \${ALPHAFOLD_DB:-/data} 49 --data_dir \${ALPHAFOLD_DB:-/data}
47 --uniref90_database_path \${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta 50 --uniref90_database_path \${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta
48 --mgnify_database_path \${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2018_12.fa 51 --mgnify_database_path \${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2018_12.fa
49 --pdb70_database_path \${ALPHAFOLD_DB:-/data}/pdb70/pdb70 52 --template_mmcif_dir \${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files
50 --template_mmcif_dir \${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files
51 --obsolete_pdbs_path \${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat 53 --obsolete_pdbs_path \${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat
52 --max_template_date=\$DATE 54 --max_template_date=\$DATE
53 --bfd_database_path \${ALPHAFOLD_DB:-/data}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt 55 --bfd_database_path \${ALPHAFOLD_DB:-/data}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
54 --uniclust30_database_path \${ALPHAFOLD_DB:-/data}/uniclust30/uniclust30_2018_08/uniclust30_2018_08 56 --uniclust30_database_path \${ALPHAFOLD_DB:-/data}/uniclust30/uniclust30_2018_08/uniclust30_2018_08
57
55 ## Param introduced in AlphaFold v2.1.2: 58 ## Param introduced in AlphaFold v2.1.2:
56 --use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True} 59 --use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True}
60
61 #if $multimer:
62 --model_preset=multimer
63 --pdb_seqres_database_path=\${ALPHAFOLD_DB:-/data}/pdb_seqres/pdb_seqres.txt
64 --uniprot_database_path=\${ALPHAFOLD_DB:-/data}/uniprot/uniprot.fasta
65 ##--num_multimer_predictions_per_model=1 ## introduced alphafold>=2.2.0
66
67 #else
68 --pdb70_database_path \${ALPHAFOLD_DB:-/data}/pdb70/pdb70
69 #end if
57 && 70 &&
58 71
59 ## Generate additional outputs ------------ 72 ## Generate additional outputs ------------
60 python3 '$__tool_directory__/gen_extra_outputs.py' output/alphafold $output_plddts && 73 python3 '$__tool_directory__/gen_extra_outputs.py' output/alphafold $output_plddts
74 #if $multimer:
75 --multimer
76 #end if
77 &&
61 78
62 ## HTML output 79 ## HTML output
63 mkdir -p '${ html.files_path }' && 80 mkdir -p '${ html.files_path }' &&
64 cp '$__tool_directory__/alphafold.html' '${html}' && 81 cp '$__tool_directory__/alphafold.html' '${html}' &&
65 cp output/alphafold/ranked_*.pdb '${html.files_path}' && 82 cp output/alphafold/ranked_*.pdb '${html.files_path}' &&
66 83
67 ## This is a (hacky) fix for a bug that has appeared in multiple Pulsar servers. 84 ## This is a (hacky) fix for a bug that has appeared in multiple Pulsar servers.
68 ## The working directory ends up two levels deep and the visualization html page 85 ## The working directory ends up two levels deep and the visualization html page
69 ## fails to load the PDB files as static assets. 86 ## fails to load the PDB files as static assets.
70 [ -d working ] && cp -r working/* . 87 (([ -d working ] && cp -r working/* .) || true)
71 88
72 ]]></command> 89 ]]></command>
73 <inputs> 90 <inputs>
74 <conditional name="fasta_or_text"> 91 <conditional name="fasta_or_text">
75 <param name="input_mode" type="select" label="Fasta Input" help="Single protein sequence to fold. Input can be fasta file from history, or text. Sequence must be valid IUPAC amino acid characters. Provide only 1 sequence per job."> 92 <param name="input_mode" type="select" label="Fasta Input" help="Protein sequence(s) to fold. Input can be fasta file from history, or text. Sequence must be valid IUPAC amino acid characters. If multiple sequences FASTA file provided, multimer mode must be selected.">
76 <option value="history">Use fasta from history</option> 93 <option value="history">Use fasta from history</option>
77 <option value="textbox">Paste sequence into textbox</option> 94 <option value="textbox">Paste sequence into textbox</option>
78 </param> 95 </param>
79 <when value="history"> 96 <when value="history">
80 <param name="fasta_file" type="data" format="fasta" label="Fasta file from history" help="Select single fasta protein sequence from your history. If you wish to fold multiple proteins, submit an individual job for each protein." /> 97 <param name="fasta_file" type="data" multiple="false" format="fasta" label="Fasta file from history" help="Select single FASTA protein file from your history. If you wish to fold multiple proteins, submit an individual job for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in this file." />
81 </when> 98 </when>
82 <when value="textbox"> 99 <when value="textbox">
83 <param name="fasta_text" type="text" area="true" value="" label="Paste sequence" help="Paste single protein sequence into the textbox. If you wish to fold multiple proteins, submit individual jobs for each protein." /> 100 <param name="fasta_text" type="text" area="true" value="" label="Paste sequence" help="Paste single protein sequence into the textbox. If you wish to fold multiple proteins, submit individual jobs for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in FASTA format." />
84 </when> 101 </when>
85 </conditional> 102 </conditional>
103
104 <param
105 name="multimer"
106 type="boolean"
107 checked="false"
108 label="Multimer mode"
109 help="Fold a protein multimer from multiple input sequences. You must input multiple sequences to run this mode."
110 />
111
86 <param name="output_plddts" type="boolean" checked="false" label="Output per-residue confidence scores" truevalue="--plddts" falsevalue="" help="Alphafold produces a pLDDT score between 0-100 for each residue in the folded models. High scores represent high confidence in placement for the residue, while low scoring residues have lower confidence. Sections of low confidence often occur in disordered regions. " /> 112 <param name="output_plddts" type="boolean" checked="false" label="Output per-residue confidence scores" truevalue="--plddts" falsevalue="" help="Alphafold produces a pLDDT score between 0-100 for each residue in the folded models. High scores represent high confidence in placement for the residue, while low scoring residues have lower confidence. Sections of low confidence often occur in disordered regions. " />
87 </inputs> 113 </inputs>
114
88 <outputs> 115 <outputs>
89 <data name="model5" format="pdb" from_work_dir="output/alphafold/ranked_4.pdb" label="${tool.name} on ${on_string}: Model 5"/> 116 <data name="model5" format="pdb" from_work_dir="output/alphafold/ranked_4.pdb" label="${tool.name} on ${on_string}: Model 5"/>
90 <data name="model4" format="pdb" from_work_dir="output/alphafold/ranked_3.pdb" label="${tool.name} on ${on_string}: Model 4"/> 117 <data name="model4" format="pdb" from_work_dir="output/alphafold/ranked_3.pdb" label="${tool.name} on ${on_string}: Model 4"/>
91 <data name="model3" format="pdb" from_work_dir="output/alphafold/ranked_2.pdb" label="${tool.name} on ${on_string}: Model 3"/> 118 <data name="model3" format="pdb" from_work_dir="output/alphafold/ranked_2.pdb" label="${tool.name} on ${on_string}: Model 3"/>
92 <data name="model2" format="pdb" from_work_dir="output/alphafold/ranked_1.pdb" label="${tool.name} on ${on_string}: Model 2"/> 119 <data name="model2" format="pdb" from_work_dir="output/alphafold/ranked_1.pdb" label="${tool.name} on ${on_string}: Model 2"/>
159 186
160 .. class:: infomark 187 .. class:: infomark
161 188
162 **What it does** 189 **What it does**
163 190
164 | AlphaFold v2.0: AI-guided 3D structure prediction of proteins 191 | AlphaFold v2.1: AI-guided 3D structure prediction of proteins
165 | 192 |
166 193
167 *What is AlphaFold?* 194 *What is AlphaFold?*
168 195
169 | AlphaFold is a program which uses neural networks to predict the tertiary (3D) structure of proteins. AlphaFold accepts an amino acid sequence (in Fasta format), then will 'fold' that sequence into a 3D model. 196 | AlphaFold is a program which uses neural networks to predict the tertiary (3D) structure of proteins. AlphaFold accepts an amino acid sequence (in Fasta format), then will 'fold' that sequence into a 3D model.
170 | NOTE: AlphaFold has a number of versions - this tool uses AlphaFold v2.0. 197 |
198 | **NOTE: AlphaFold has numerous versions - this tool uses AlphaFold v2.1.2.**
171 | 199 |
172 200
173 *What makes AlphaFold different?* 201 *What makes AlphaFold different?*
174 202
175 | The ability to use computers to predict 3D protein structures with high accuracy is desirable because it removes the time-consuming and costly process of determining structures experimentally. 203 | The ability to use computers to predict 3D protein structures with high accuracy is desirable because it removes the time-consuming and costly process of determining structures experimentally.
177 | AlphaFold represents a leap forward by regularly predicting structures to atomic-level accuracy, even when no similar structures are known. 205 | AlphaFold represents a leap forward by regularly predicting structures to atomic-level accuracy, even when no similar structures are known.
178 | 206 |
179 207
180 *Downstream analysis* 208 *Downstream analysis*
181 209
182 | Obtaining a protein fold is the first step in many analyses. 210 | Obtaining a protein structure prediction is the first step in many analyses.
183 | The 3D models created by AlphaFold can be used in downstream analysis, including the following: 211 | The 3D models created by AlphaFold can be used in downstream analysis, including the following:
184 | 212 |
185 213
186 - Inspecting protein features 214 - Inspecting protein features
187 3D viewers (pymol, chimera, ngl, blender) can be used to inspect active sites, regulatory domains, binding sites. 215 3D viewers (pymol, chimera, ngl, blender) can be used to inspect active sites, regulatory domains, binding sites.
190 This is especially useful in screening drug candidates. 218 This is especially useful in screening drug candidates.
191 - Protein-protein interactions 219 - Protein-protein interactions
192 Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation. 220 Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation.
193 To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD <https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/>`_ and `SeeSAR <https://www.biosolveit.de/SeeSAR>`_, but many `free and open-source options <https://en.wikipedia.org/wiki/List_of_protein-ligand_docking_software>`_ are available such as `AutoDock <https://autodock.scripps.edu/>`_ and `SwissDock <http://www.swissdock.ch/>`_. 221 To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD <https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/>`_ and `SeeSAR <https://www.biosolveit.de/SeeSAR>`_, but many `free and open-source options <https://en.wikipedia.org/wiki/List_of_protein-ligand_docking_software>`_ are available such as `AutoDock <https://autodock.scripps.edu/>`_ and `SwissDock <http://www.swissdock.ch/>`_.
194 222
223 | Protein complex interactions are also commonly observed with AlphaFold's multimer prediction mode.
224 |
225 |
226
195 **Input** 227 **Input**
196 228
197 *Amino acid sequence* 229 *Amino acid sequence*
198 230
199 | AlphaFold accepts a **single amino acid sequence** in FASTA format. 231 | AlphaFold monomer (default) accepts a **single amino acid sequence** in FASTA format.
200 | You can choose to input either a file from your Galaxy history or paste a sequence into a text box. 232 | You can choose to input either a file from your Galaxy history or paste a sequence into a text box.
201 | Please paste only a single sequence - we can only process a single sequence per job. 233 | If you choose the ``multimer`` option, you can supply a FASTA file containing **multiple sequences** to be folded concurrently into a multimer.
202 | Multiple sequences will return an error. 234 |
203 | 235 |
204 236
205 **Outputs** 237 **Outputs**
206 238
207 *Visualization* 239 *Visualization*
208 240
209 | An interactive 3D graphic of the best predicted molecular structures. 241 An interactive 3D graphic of the best predicted molecular structures.
210 | This output can be opened in Galaxy to give a visual impression of the results, with different structural representations to choose from. 242 This output can be opened in Galaxy to give a visual impression of the results, with different structural representations to choose from.
211 | Open the "Visualization" history output by clicking on the "view data" icon: 243 Open the "Visualization" history output by clicking on the "view data" icon:
212 |
213 244
214 .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold-visualization.png?raw=true 245 .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold-visualization.png?raw=true
215 :height: 520 246 :height: 520
216 :alt: Result visualization 247 :alt: Result visualization
217 248
223 | These files describe the molecular structures and can be used for downstream analysis. e.g. *in silico* molecular docking. 254 | These files describe the molecular structures and can be used for downstream analysis. e.g. *in silico* molecular docking.
224 | 255 |
225 256
226 *Model confidence scores (optional)* 257 *Model confidence scores (optional)*
227 258
228 | This optional output produces a file which describes the confidence scores for each model (based on `pLDDTs <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3799472/>`_) which may be useful for downstream analysis. 259 | This optional output produces a file which describes the confidence scores for each model (based on `pLDDTs <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3799472/>`_, or the ``iptm+ptm`` score if run in multimer mode) which may be useful for downstream analysis.
229 | Model confidence scores are also included as a column in the default PDB output. 260 | Model confidence scores are also included as a column (replacing ``bFactor``) in the default PDB output.
261 |
262 |
263
264 **AlphaFold configuration**
265
266 | We have configured AlphaFold to run with the parameters suggested by default on `AlphaFold's GitHub <https://github.com/deepmind/alphafold>`_.
267 | This means that it runs against the full database with Amber relaxation, with ``max_template_date`` set to today's date. If there are additonal parameters that you would like to interact with, please `send a support request to Galaxy AU <https://site.usegalaxy.org.au/request/support>`_, or open an issue on `our GitHub <https://github.com/usegalaxy-au/tools-au>`_.
268 |
230 | 269 |
231 270
232 **External Resources** 271 **External Resources**
233 272
234 We recommend checking out the 273 We HIGHLY recommend checking out the
235 `Alphafold Protein Structure Database <https://alphafold.ebi.ac.uk/>`_, 274 `Alphafold Protein Structure Database <https://alphafold.ebi.ac.uk/>`_,
236 which contains predicted sequences for thousands of Human proteins. See also: 275 which contains pre-computed structures for over 200 million known proteins.
276 See also:
237 277
238 - `Google Deepmind's article on AlphaFold <https://deepmind.com/blog/article/alphafold-a-solution-to-a-50-year-old-grand-challenge-in-biology>`_ 278 - `Google Deepmind's article on AlphaFold <https://deepmind.com/blog/article/alphafold-a-solution-to-a-50-year-old-grand-challenge-in-biology>`_
239 - `AlphaFold source code on GitHub <https://github.com/deepmind/alphafold>`_ 279 - `AlphaFold source code on GitHub <https://github.com/deepmind/alphafold>`_
240 280
241 ]]></help> 281 ]]></help>
242 <citations> 282 <citations>
243 <citation type="doi">https://doi.org/10.1038/s41586-021-03819-2</citation> 283 <citation type="doi">https://doi.org/10.1038/s41586-021-03819-2</citation>
284 <citation type="doi">https://doi.org/10.1101/2021.10.04.463034</citation>
244 </citations> 285 </citations>
245 </tool> 286 </tool>