comparison alphafold.xml @ 14:d00e15139065 draft

planemo upload for repository https://github.com/usegalaxy-au/tools-au commit d490defa32d9c318137d2d781243b392cb14110d-dirty
author galaxy-australia
date Tue, 28 Feb 2023 01:15:42 +0000
parents c0e71cb2bd1b
children a58f7eb0df2c
comparison
equal deleted inserted replaced
13:c0e71cb2bd1b 14:d00e15139065
1 <tool id="alphafold" name="Alphafold 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> 1 <tool id="alphafold" name="Alphafold 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description> - AI-guided 3D structural prediction of proteins</description> 2 <description> - AI-guided 3D structural prediction of proteins</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">2.1.2</token> 4 <token name="@TOOL_VERSION@">2.3.1</token>
5 <token name="@VERSION_SUFFIX@">4</token> 5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros> 6 </macros>
7 <edam_topics> 7 <edam_topics>
8 <edam_topic>topic_0082</edam_topic> 8 <edam_topic>topic_0082</edam_topic>
9 </edam_topics> 9 </edam_topics>
10 <edam_operations> 10 <edam_operations>
11 <edam_operation>operation_0474</edam_operation> 11 <edam_operation>operation_0474</edam_operation>
12 </edam_operations> 12 </edam_operations>
13 <xrefs> 13 <xrefs>
14 <xref type="bio.tools">alphafold_2.0</xref> 14 <xref type="bio.tools">alphafold_2</xref>
15 </xrefs> 15 </xrefs>
16 <requirements> 16 <requirements>
17 <container type="docker">neoformit/alphafold:v2.1.2_0</container> 17 <container type="docker">neoformit/alphafold:v2.3.1_1</container>
18 </requirements> 18 </requirements>
19 <command detect_errors="exit_code"><![CDATA[ 19 <command detect_errors="exit_code"><![CDATA[
20 20
21 ## $ALPHAFOLD_DB variable should point to the location of the AlphaFold 21 ## $ALPHAFOLD_DB variable should point to the location of the AlphaFold
22 ## databases - defaults to /data 22 ## databases - defaults to /data
23 23
24 ## fasta setup ---------------------------- 24 ## Read FASTA input ----------------------------
25 #if $fasta_or_text.input_mode == 'history': 25 #if $fasta_or_text.input_mode == 'history':
26 cp '$fasta_or_text.fasta_file' input.fasta && 26 cp '$fasta_or_text.fasta_file' input.fasta
27 27
28 #elif $fasta_or_text.input_mode == 'textbox': 28 #elif $fasta_or_text.input_mode == 'textbox':
29 echo '$fasta_or_text.fasta_text' > input.fasta && 29 echo '$fasta_or_text.fasta_text' > input.fasta
30 #end if 30 #end if
31 31
32 python3 '$__tool_directory__/validate_fasta.py' input.fasta 32 && python3 '$__tool_directory__/validate_fasta.py' input.fasta
33 --min_length \${ALPHAFOLD_AA_LENGTH_MIN:-0} 33 --min_length \${ALPHAFOLD_AA_LENGTH_MIN:-0}
34 --max_length \${ALPHAFOLD_AA_LENGTH_MAX:-0} 34 --max_length \${ALPHAFOLD_AA_LENGTH_MAX:-0}
35 #if $multimer: 35 #if $multimer:
36 --multimer 36 --multimer
37 #end if 37 #end if
38 > alphafold.fasta && 38 > alphafold.fasta
39 39
40 ## env vars ------------------------------- 40 ## Env vars -------------------------------
41 export TF_FORCE_UNIFIED_MEMORY=1 && 41 && export TF_FORCE_UNIFIED_MEMORY=1
42 export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0 && 42 && export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0
43 export DATE=`date +"%Y-%m-%d"` && 43 && export TODAY=`date +"%Y-%m-%d"`
44 44
45 ## run alphafold ------------------------- 45 ## Run alphafold -------------------------
46 python /app/alphafold/run_alphafold.py 46 && python /app/alphafold/run_alphafold.py
47 --fasta_paths alphafold.fasta 47 --fasta_paths alphafold.fasta
48 --output_dir output 48 --output_dir output
49 --data_dir \${ALPHAFOLD_DB:-/data} 49 --data_dir \${ALPHAFOLD_DB:-/data}
50 --max_template_date=\$DATE 50
51 51 ## Set reference database paths
52 ## Set reference data explicitly 52 --uniref90_database_path \${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta
53 --uniref90_database_path \${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta 53 --mgnify_database_path \${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2022_05.fa
54 --mgnify_database_path \${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2018_12.fa 54 --template_mmcif_dir \${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files
55 --template_mmcif_dir \${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files 55 --obsolete_pdbs_path \${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat
56 --obsolete_pdbs_path \${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat 56 #if $dbs == 'full':
57 #if $dbs == 'full': 57 --bfd_database_path \${ALPHAFOLD_DB:-/data}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
58 --bfd_database_path \${ALPHAFOLD_DB:-/data}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt 58 --uniref30_database_path \${ALPHAFOLD_DB:-/data}/uniref30/UniRef30_2021_03
59 --uniclust30_database_path \${ALPHAFOLD_DB:-/data}/uniclust30/uniclust30_2018_08/uniclust30_2018_08 59 #else
60 #else 60 --db_preset=reduced_dbs
61 --db_preset=reduced_dbs 61 --small_bfd_database_path \${ALPHAFOLD_DB:-/data}/small_bfd/bfd-first_non_consensus_sequences.fasta
62 --small_bfd_database_path \${ALPHAFOLD_DB:-/data}/small_bfd/bfd-first_non_consensus_sequences.fasta 62 #end if
63 #end if 63
64 64 #if $max_template_date:
65 ## Param introduced in AlphaFold v2.1.2: 65 --max_template_date=$max_template_date
66 --use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True} 66 #else
67 67 --max_template_date=\$TODAY
68 #if $multimer: 68 #end if
69 --model_preset=multimer 69
70 --pdb_seqres_database_path=\${ALPHAFOLD_DB:-/data}/pdb_seqres/pdb_seqres.txt 70 --use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True} ## introduced in v2.1.2
71 --uniprot_database_path=\${ALPHAFOLD_DB:-/data}/uniprot/uniprot.fasta 71
72 ##--num_multimer_predictions_per_model=1 ## introduced alphafold>=2.2.0 72 #if $multimer:
73 73 --model_preset=multimer
74 #else 74 --pdb_seqres_database_path=\${ALPHAFOLD_DB:-/data}/pdb_seqres/pdb_seqres.txt
75 --pdb70_database_path \${ALPHAFOLD_DB:-/data}/pdb70/pdb70 75 --uniprot_database_path=\${ALPHAFOLD_DB:-/data}/uniprot/uniprot.fasta
76 #end if 76 --num_multimer_predictions_per_model=1 ## introduced in v2.2.0
77 && 77 #else
78 --pdb70_database_path \${ALPHAFOLD_DB:-/data}/pdb70/pdb70
79 #end if
78 80
79 ## Generate additional outputs ------------ 81 ## Generate additional outputs ------------
80 python3 '$__tool_directory__/gen_extra_outputs.py' output/alphafold $output_plddts 82 && python3 '$__tool_directory__/outputs.py' output/alphafold $outputs.plddts
81 #if $multimer: 83 #if $multimer:
82 --multimer 84 --multimer
83 #end if 85 #end if
84 &&
85 86
86 ## HTML output 87 ## HTML output
87 mkdir -p '${ html.files_path }' && 88 && mkdir -p '${ html.files_path }'
88 cp '$__tool_directory__/alphafold.html' '${html}' && 89 && cp '$__tool_directory__/alphafold.html' '${html}'
89 cp output/alphafold/ranked_*.pdb '${html.files_path}' && 90 && cp output/alphafold/ranked_*.pdb '${html.files_path}'
90 91
91 ## This is a (hacky) fix for a bug that has appeared in multiple Pulsar servers. 92 ## This is a (hacky) fix for a bug that has appeared in multiple Pulsar servers.
92 ## The working directory ends up two levels deep and the visualization html page 93 ## The working directory ends up two levels deep and the visualization html page
93 ## fails to load the PDB files as static assets. 94 ## fails to load the PDB files as static assets.
94 (([ -d working ] && cp -r working/* .) || true) 95 && (([ -d working ] && cp -r working/* .) || true)
95 96
96 ]]></command> 97 ]]></command>
97 <inputs> 98 <inputs>
98 <conditional name="fasta_or_text"> 99 <conditional name="fasta_or_text">
99 <param name="input_mode" type="select" label="Fasta Input" help="Protein sequence(s) to fold. Input can be fasta file from history, or text. Sequence must be valid IUPAC amino acid characters. If multiple sequences FASTA file provided, multimer mode must be selected."> 100 <param name="input_mode" type="select" label="Fasta Input" help="Protein sequence(s) to fold. Input can be fasta file from history, or text. Sequence must be valid IUPAC amino acid characters. If multiple-sequence FASTA file provided, multimer mode must be selected.">
100 <option value="history">Use fasta from history</option> 101 <option value="history">Use fasta from history</option>
101 <option value="textbox">Paste sequence into textbox</option> 102 <option value="textbox">Paste sequence into textbox</option>
102 </param> 103 </param>
103 <when value="history"> 104 <when value="history">
104 <param name="fasta_file" type="data" multiple="false" format="fasta" label="Fasta file from history" help="Select single FASTA protein file from your history. If you wish to fold multiple proteins, submit an individual job for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in this file." /> 105 <param name="fasta_file" type="data" multiple="false" format="fasta" label="Fasta file from history" help="Select single FASTA protein file from your history. If you wish to fold multiple proteins, submit an individual job for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in this file." />
107 <param name="fasta_text" type="text" area="true" value="" label="Paste sequence" help="Paste single protein sequence into the textbox. If you wish to fold multiple proteins, submit individual jobs for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in FASTA format." /> 108 <param name="fasta_text" type="text" area="true" value="" label="Paste sequence" help="Paste single protein sequence into the textbox. If you wish to fold multiple proteins, submit individual jobs for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in FASTA format." />
108 </when> 109 </when>
109 </conditional> 110 </conditional>
110 111
111 <param 112 <param
113 name="max_template_date"
114 type="text"
115 label="Max template date (yyyy-mm-dd) (optional)"
116 help="The model will reference PDB structures deposited before this date only. Defaults to today's date."
117 optional="true"
118 >
119 <sanitizer>
120 <valid initial="string.digits">
121 <add value="-" />
122 </valid>
123 </sanitizer>
124 <validator type="regex">[0-9]{4}-[0-9]{2}-[0-9]{2}</validator>
125 </param>
126
127 <param
112 name="dbs" 128 name="dbs"
113 type="select" 129 type="select"
114 display="radio" 130 display="radio"
115 label="Select database" 131 label="Select database"
116 help="The reduced database allows significantly faster run time in 132 help="The reduced database allows significantly faster run time in
123 <param 139 <param
124 name="multimer" 140 name="multimer"
125 type="boolean" 141 type="boolean"
126 checked="false" 142 checked="false"
127 label="Multimer mode" 143 label="Multimer mode"
128 help="Fold a protein multimer from multiple input sequences. You must input multiple sequences to run this mode." 144 help="Fold a protein multimer from multiple input sequences. You must input multiple sequences in FASTA to run this mode."
129 /> 145 />
130 146
131 <param name="output_plddts" type="boolean" checked="false" label="Output per-residue confidence scores" truevalue="--plddts" falsevalue="" help="Alphafold produces a pLDDT score between 0-100 for each residue in the folded models. High scores represent high confidence in placement for the residue, while low scoring residues have lower confidence. Sections of low confidence often occur in disordered regions. " /> 147 <section name="outputs" title="Optional outputs" expanded="false">
148 <param
149 name="confidence_scores"
150 type="boolean"
151 checked="false"
152 label="Per-model confidence scores"
153 help="A tabular file showing average confidence score for each model (predicted template modelling (PTM) score; interface PTM is incorporated into this score for multimer predictions)."
154 />
155 <param
156 name="plddts"
157 type="boolean"
158 checked="false"
159 label="Per-residue confidence scores"
160 truevalue="--plddts"
161 falsevalue=""
162 help="Alphafold produces a pLDDT score between 0-100 for each residue in the folded models. High scores represent high confidence in placement for the residue, while low scoring residues have lower confidence. This output is a tabular file with five rows (one for each output PDB model), with each column providing a pLDDT score for a single residue. These data have been parsed from the model pickle files (below)."
163 />
164 <param
165 name="model_pkls"
166 type="boolean"
167 checked="false"
168 label="ranked_*.pkl"
169 help="A pickle file containing metrics used for the assessment of the model's accuracy. These include per-residue pLDDT scores (see above), predicted TM (Template Modelling) score, which is a global superposition metric and predicted aligned error (a matrix size (number of residues) x (number of residues) where each position describes the confidence of the residue's 3D position relative to another residue in the model; can be used for the interpretation of relative positions of domains). Pickle files can be read and processed using the Python 'pickle' library. Outputs are named respectively to PDB outputs."
170 />
171 <param
172 name="relax_json"
173 type="boolean"
174 checked="false"
175 label="relax_metrics.json"
176 help="A JSON-formatted text file containing relax metrics (mostly remaining violations)."
177 />
178 </section>
132 </inputs> 179 </inputs>
133 180
134 <outputs> 181 <outputs>
135 <data name="model5" format="pdb" from_work_dir="output/alphafold/ranked_4.pdb" label="${tool.name} on ${on_string}: Model 5"/> 182 <data name="model5" format="pdb" from_work_dir="output/alphafold/ranked_4.pdb" label="${tool.name} on ${on_string}: PDB ranked 4"/>
136 <data name="model4" format="pdb" from_work_dir="output/alphafold/ranked_3.pdb" label="${tool.name} on ${on_string}: Model 4"/> 183 <data name="model4" format="pdb" from_work_dir="output/alphafold/ranked_3.pdb" label="${tool.name} on ${on_string}: PDB ranked 3"/>
137 <data name="model3" format="pdb" from_work_dir="output/alphafold/ranked_2.pdb" label="${tool.name} on ${on_string}: Model 3"/> 184 <data name="model3" format="pdb" from_work_dir="output/alphafold/ranked_2.pdb" label="${tool.name} on ${on_string}: PDB ranked 2"/>
138 <data name="model2" format="pdb" from_work_dir="output/alphafold/ranked_1.pdb" label="${tool.name} on ${on_string}: Model 2"/> 185 <data name="model2" format="pdb" from_work_dir="output/alphafold/ranked_1.pdb" label="${tool.name} on ${on_string}: PDB ranked 1"/>
139 <data name="model1" format="pdb" from_work_dir="output/alphafold/ranked_0.pdb" label="${tool.name} on ${on_string}: Model 1"/> 186 <data name="model1" format="pdb" from_work_dir="output/alphafold/ranked_0.pdb" label="${tool.name} on ${on_string}: PDB ranked 0"/>
140 <data name="confidence_scores" format="tsv" from_work_dir="output/alphafold/model_confidence_scores.tsv" label="${tool.name} on ${on_string}: Model confidence scores"/>
141 <data name="plddts" format="tsv" from_work_dir="output/alphafold/plddts.tsv" label="${tool.name} on ${on_string}: Per-residue confidence scores (plddts)">
142 <filter>(output_plddts)</filter>
143 </data>
144 <data name="html" format="html" label="${tool.name} on ${on_string}: Visualization" /> 187 <data name="html" format="html" label="${tool.name} on ${on_string}: Visualization" />
188
189 <!-- Optional outputs -->
190 <data
191 name="output_confidence_scores"
192 format="tabular"
193 from_work_dir="output/alphafold/extra/model_confidence_scores.tsv"
194 label="${tool.name} on ${on_string}: Model confidence scores"
195 >
196 <filter>outputs['confidence_scores']</filter>
197 </data>
198
199 <data
200 name="output_plddts"
201 format="tabular"
202 from_work_dir="output/alphafold/extra/plddts.tsv"
203 label="${tool.name} on ${on_string}: Per-residue confidence scores (plddts)"
204 >
205 <filter>outputs['plddts']</filter>
206 </data>
207
208 <data
209 name="output_ranked_4_pkl"
210 format="binary"
211 from_work_dir="output/alphafold/extra/ranked_4.pkl"
212 label="${tool.name} on ${on_string}: ranked_4.pkl"
213 >
214 <filter>outputs['model_pkls']</filter>
215 </data>
216 <data
217 name="output_ranked_3_pkl"
218 format="binary"
219 from_work_dir="output/alphafold/extra/ranked_3.pkl"
220 label="${tool.name} on ${on_string}: ranked_3.pkl"
221 >
222 <filter>outputs['model_pkls']</filter>
223 </data>
224 <data
225 name="output_ranked_2_pkl"
226 format="binary"
227 from_work_dir="output/alphafold/extra/ranked_2.pkl"
228 label="${tool.name} on ${on_string}: ranked_2.pkl"
229 >
230 <filter>outputs['model_pkls']</filter>
231 </data>
232 <data
233 name="output_ranked_1_pkl"
234 format="binary"
235 from_work_dir="output/alphafold/extra/ranked_1.pkl"
236 label="${tool.name} on ${on_string}: ranked_1.pkl"
237 >
238 <filter>outputs['model_pkls']</filter>
239 </data>
240 <data
241 name="output_ranked_0_pkl"
242 format="binary"
243 from_work_dir="output/alphafold/extra/ranked_0.pkl"
244 label="${tool.name} on ${on_string}: ranked_0.pkl"
245 >
246 <filter>outputs['model_pkls']</filter>
247 </data>
248 <data
249 name="output_relax_json"
250 format="json"
251 from_work_dir="output/alphafold/extra/relax_metrics_ranked.json"
252 label="${tool.name} on ${on_string}: relax_metrics_ranked.json"
253 >
254 <filter>outputs['relax_json']</filter>
255 </data>
145 </outputs> 256 </outputs>
257
146 <tests> 258 <tests>
147 <test expect_num_outputs="8"> 259 <test expect_num_outputs="8">
148 <conditional name="fasta_or_text"> 260 <conditional name="fasta_or_text">
149 <param name="input_mode" value="history"/> 261 <param name="input_mode" value="history"/>
150 <param name="fasta_file" value="test1.fasta"/> 262 <param name="fasta_file" value="test1.fasta"/>
151 </conditional> 263 </conditional>
152 <param name="output_plddts" value="true"/> 264 <param name="plddts" value="true"/>
153 <output name="plddts"> 265 <output name="output_plddts">
154 <assert_contents> 266 <assert_contents>
155 <has_n_columns n="2"/> 267 <has_n_columns n="2"/>
156 <has_n_lines n="6"/> 268 <has_n_lines n="6"/>
157 <has_size value="2900" delta="300"/> 269 <has_size value="2900" delta="300"/>
158 </assert_contents> 270 </assert_contents>
159 </output> 271 </output>
160 <output name="confidence_scores"> 272 <output name="output_confidence_scores">
161 <assert_contents> 273 <assert_contents>
162 <has_n_columns n="2"/> 274 <has_n_columns n="2"/>
163 <has_n_lines n="6"/> 275 <has_n_lines n="6"/>
164 <has_size value="70" delta="50"/> 276 <has_size value="70" delta="50"/>
165 </assert_contents> 277 </assert_contents>
203 </tests> 315 </tests>
204 <help><![CDATA[ 316 <help><![CDATA[
205 317
206 .. class:: infomark 318 .. class:: infomark
207 319
320 | AlphaFold v2: AI-guided 3D structural prediction of proteins
321 |
322 | **NOTE: this tool packages AlphaFold v2.3.1.**
323 |
324 | This means that the neural network has been trained on PDBs with a release
325 | date before 2021-09-30 (the training cutoff was 2018-04-30 until ``v2.3.0``).
326 |
327 | Find out more in the technical and release notes:
328 |
329
330 - `Release notes for v2.3.1 <https://github.com/deepmind/alphafold/releases/tag/v2.3.1>`_
331 - `Technical notes for v2.3 <https://github.com/deepmind/alphafold/blob/main/docs/technical_note_v2.3.0.md>`_
332
333 | If you want to use AlphaFold trained against an older cutoff date, switch to Galaxy version ``2.1.2`` (which was trained to data up to 2018-04-30).
334 |
335
208 **What it does** 336 **What it does**
209 337
210 | AlphaFold v2.1: AI-guided 3D structure prediction of proteins
211 |
212
213 *What is AlphaFold?* 338 *What is AlphaFold?*
214 339
215 | AlphaFold is a program which uses neural networks to predict the tertiary (3D) structure of proteins. AlphaFold accepts an amino acid sequence (in Fasta format), then will 'fold' that sequence into a 3D model. 340 | AlphaFold is a program which uses neural networks to predict the tertiary (3D) structure of proteins. AlphaFold accepts an amino acid sequence in Fasta format, which will be "folded" into a 3D model.
216 |
217 | **NOTE: AlphaFold has numerous versions - this tool uses AlphaFold v2.1.2.**
218 | 341 |
219 342
220 *What makes AlphaFold different?* 343 *What makes AlphaFold different?*
221 344
222 | The ability to use computers to predict 3D protein structures with high accuracy is desirable because it removes the time-consuming and costly process of determining structures experimentally. 345 | The ability to use computers to predict 3D protein structures with high accuracy is desirable because it removes the time-consuming and costly process of determining structures experimentally.
223 | In-silico protein folding has been an active field of research for decades, but existing tools ran more slowly and with less reliability than AlphaFold. 346 | In-silico protein folding has been an active field of research for decades, but existing tools were slower and far less reliable than AlphaFold.
224 | AlphaFold represents a leap forward by regularly predicting structures to atomic-level accuracy, even when no similar structures are known. 347 | AlphaFold represents a leap forward by regularly predicting structures to atomic-level accuracy, even when no similar structures are known.
225 | 348 |
349
350
351 **Input**
352
353 *Amino acid sequence*
354
355 | AlphaFold monomer (default) accepts a **single amino acid sequence** in FASTA format.
356 | You can choose to input either a file from your Galaxy history or paste a sequence into a text box.
357 | If you choose the ``multimer`` option, you can supply a FASTA file containing **multiple sequences** to be folded concurrently into a multimer.
358 |
359 |
360
361 **Outputs**
362
363 *Visualization*
364
365 An interactive 3D graphic of the best predicted molecular structures.
366 This output can be opened in Galaxy to give a visual impression of the results, with different structural representations to choose from.
367 Open the "Visualization" history output by clicking on the "view data" icon:
368
369 .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold-visualization.png?raw=true
370 :height: 520
371 :alt: Result visualization
372
373 |
374
375 *PDB files*
376
377 | Five PDB (Protein Data Bank) files are be created, ordered by rank, as predicted by AlphaFold.
378 | These files describe the molecular structures and can be used for downstream analysis. e.g. *in silico* molecular docking.
379 | **PLEASE NOTE** that all outputs have been renamed to their respective rank order, including model and model.pkl files.
380 |
381
382 *Model confidence scores (optional)*
383
384 | This optional output produces a file which describes the confidence scores for each model (based on `pLDDTs <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3799472/>`_, or the ``iptm+ptm`` score if run in multimer mode) which may be useful for downstream analysis.
385 | Model confidence scores are also included as a column (replacing ``bFactor``) in the default PDB output.
386 |
387 |
388
389 *Model data files (ranked_n.pkl)*
390
391 | Per-model data stored in pickle files (a Python binary data format). These files can be used as inputs to downstream analysis software (such as Chimera X) for visualizing structures and computing kinetics between protein multimers and domains.
392 | The tool will produce one ``.pkl`` output for each of the PDB models.
393 |
394 |
395
396 *relax_metrics.json (optional)*
397
398 | A JSON-formatted text file containing relax metrics (mostly remaining violations).
399 |
400
401 **AlphaFold configuration**
402
403 | We have configured AlphaFold to run with the parameters suggested by default on `AlphaFold's GitHub <https://github.com/deepmind/alphafold>`_.
404 | This means that it runs with Amber relaxation enabled, with relaxed PDB models collected as output datasets. If there are additonal parameters that you would like to interact with, please `send a support request to Galaxy AU <https://site.usegalaxy.org.au/request/support>`_, or open an issue on `our GitHub <https://github.com/usegalaxy-au/tools-au>`_.
405 |
406 |
407
408 **External Resources**
409
410 We highly recommend checking out the
411 `Alphafold Protein Structure Database <https://alphafold.ebi.ac.uk/>`_,
412 which contains pre-computed structures for over 200 million known proteins.
413 See also:
414
415 - `Google Deepmind's article on AlphaFold <https://deepmind.com/blog/article/alphafold-a-solution-to-a-50-year-old-grand-challenge-in-biology>`_
416 - `AlphaFold source code on GitHub <https://github.com/deepmind/alphafold>`_
226 417
227 *Downstream analysis* 418 *Downstream analysis*
228 419
229 | Obtaining a protein structure prediction is the first step in many analyses. 420 | Obtaining a protein structure prediction is the first step in many analyses.
230 | The 3D models created by AlphaFold can be used in downstream analysis, including the following: 421 | The 3D models created by AlphaFold can be used in downstream analysis, including the following:
235 - Molecular docking 426 - Molecular docking
236 3D structures can be used to predict the binding affinity of different compounds. 427 3D structures can be used to predict the binding affinity of different compounds.
237 This is especially useful in screening drug candidates. 428 This is especially useful in screening drug candidates.
238 - Protein-protein interactions 429 - Protein-protein interactions
239 Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation. 430 Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation.
240 To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD <https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/>`_ and `SeeSAR <https://www.biosolveit.de/SeeSAR>`_, but many `free and open-source options <https://en.wikipedia.org/wiki/List_of_protein-ligand_docking_software>`_ are available such as `AutoDock <https://autodock.scripps.edu/>`_ and `SwissDock <http://www.swissdock.ch/>`_. 431 To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD <https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/>`_ and `SeeSAR <https://www.biosolveit.de/SeeSAR>`_, but many `free and open-source options <https://en.wikipedia.org/wiki/List_of_protein-ligand_docking_software>`_ are available such as `AutoDock <https://autodock.scripps.edu/>`_, `SwissDock <http://www.swissdock.ch/>`_, `DockQ <https://github.com/bjornwallner/DockQ>`_, `MM-Align <https://zhanggroup.org/MM-align/>`_ and `TM-Align <https://zhanggroup.org/TM-align/>`_. Protein-protein interactions are often inferred from AlphaFold-Multimer predictions, which provide a level of confidence in binding affinity between homomer/heteromer subunits.
241
242 | Protein complex interactions are also commonly observed with AlphaFold's multimer prediction mode.
243 |
244 |
245
246 **Input**
247
248 *Amino acid sequence*
249
250 | AlphaFold monomer (default) accepts a **single amino acid sequence** in FASTA format.
251 | You can choose to input either a file from your Galaxy history or paste a sequence into a text box.
252 | If you choose the ``multimer`` option, you can supply a FASTA file containing **multiple sequences** to be folded concurrently into a multimer.
253 |
254 |
255
256 **Outputs**
257
258 *Visualization*
259
260 An interactive 3D graphic of the best predicted molecular structures.
261 This output can be opened in Galaxy to give a visual impression of the results, with different structural representations to choose from.
262 Open the "Visualization" history output by clicking on the "view data" icon:
263
264 .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold-visualization.png?raw=true
265 :height: 520
266 :alt: Result visualization
267
268 |
269
270 *PDB files*
271
272 | Five PDB (Protein Data Bank) files will be created for the best ranking models predicted by AlphaFold.
273 | These files describe the molecular structures and can be used for downstream analysis. e.g. *in silico* molecular docking.
274 |
275
276 *Model confidence scores (optional)*
277
278 | This optional output produces a file which describes the confidence scores for each model (based on `pLDDTs <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3799472/>`_, or the ``iptm+ptm`` score if run in multimer mode) which may be useful for downstream analysis.
279 | Model confidence scores are also included as a column (replacing ``bFactor``) in the default PDB output.
280 |
281 |
282
283 **AlphaFold configuration**
284
285 | We have configured AlphaFold to run with the parameters suggested by default on `AlphaFold's GitHub <https://github.com/deepmind/alphafold>`_.
286 | This means that it runs against the full database with Amber relaxation, with ``max_template_date`` set to today's date. If there are additonal parameters that you would like to interact with, please `send a support request to Galaxy AU <https://site.usegalaxy.org.au/request/support>`_, or open an issue on `our GitHub <https://github.com/usegalaxy-au/tools-au>`_.
287 |
288 |
289
290 **External Resources**
291
292 We HIGHLY recommend checking out the
293 `Alphafold Protein Structure Database <https://alphafold.ebi.ac.uk/>`_,
294 which contains pre-computed structures for over 200 million known proteins.
295 See also:
296
297 - `Google Deepmind's article on AlphaFold <https://deepmind.com/blog/article/alphafold-a-solution-to-a-50-year-old-grand-challenge-in-biology>`_
298 - `AlphaFold source code on GitHub <https://github.com/deepmind/alphafold>`_
299 432
300 ]]></help> 433 ]]></help>
301 <citations> 434 <citations>
302 <citation type="doi">https://doi.org/10.1038/s41586-021-03819-2</citation> 435 <citation type="doi">https://doi.org/10.1038/s41586-021-03819-2</citation>
303 <citation type="doi">https://doi.org/10.1101/2021.10.04.463034</citation> 436 <citation type="doi">https://doi.org/10.1101/2021.10.04.463034</citation>