diff alphafold.xml @ 9:3bd420ec162d draft

planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
author galaxy-australia
date Tue, 13 Sep 2022 22:04:12 +0000
parents ca90d17ff51b
children 072c324f20fc
line wrap: on
line diff
--- a/alphafold.xml	Fri Aug 19 00:29:16 2022 +0000
+++ b/alphafold.xml	Tue Sep 13 22:04:12 2022 +0000
@@ -2,7 +2,7 @@
     <description> - AI-guided 3D structural prediction of proteins</description>
     <macros>
       <token name="@TOOL_VERSION@">2.1.2</token>
-      <token name="@VERSION_SUFFIX@">1</token>
+      <token name="@VERSION_SUFFIX@">2</token>
     </macros>
     <edam_topics>
       <edam_topic>topic_0082</edam_topic>
@@ -32,6 +32,9 @@
 python3 '$__tool_directory__/validate_fasta.py' input.fasta
 --min_length \${ALPHAFOLD_AA_LENGTH_MIN:-0}
 --max_length \${ALPHAFOLD_AA_LENGTH_MAX:-0}
+#if $multimer:
+--multimer
+#end if
 > alphafold.fasta &&
 
 ## env vars -------------------------------
@@ -46,18 +49,32 @@
 --data_dir \${ALPHAFOLD_DB:-/data}
 --uniref90_database_path \${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta
 --mgnify_database_path \${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2018_12.fa
---pdb70_database_path \${ALPHAFOLD_DB:-/data}/pdb70/pdb70
---template_mmcif_dir \${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files
+--template_mmcif_dir   \${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files
 --obsolete_pdbs_path \${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat
 --max_template_date=\$DATE
 --bfd_database_path \${ALPHAFOLD_DB:-/data}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
 --uniclust30_database_path \${ALPHAFOLD_DB:-/data}/uniclust30/uniclust30_2018_08/uniclust30_2018_08
+
 ## Param introduced in AlphaFold v2.1.2:
 --use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True}
+
+#if $multimer:
+--model_preset=multimer
+--pdb_seqres_database_path=\${ALPHAFOLD_DB:-/data}/pdb_seqres/pdb_seqres.txt
+--uniprot_database_path=\${ALPHAFOLD_DB:-/data}/uniprot/uniprot.fasta
+##--num_multimer_predictions_per_model=1  ## introduced alphafold>=2.2.0
+
+#else
+--pdb70_database_path \${ALPHAFOLD_DB:-/data}/pdb70/pdb70
+#end if
 &&
 
 ## Generate additional outputs ------------
-python3 '$__tool_directory__/gen_extra_outputs.py' output/alphafold $output_plddts &&
+python3 '$__tool_directory__/gen_extra_outputs.py' output/alphafold $output_plddts
+#if $multimer:
+--multimer
+#end if
+&&
 
 ## HTML output
 mkdir -p '${ html.files_path }' &&
@@ -67,24 +84,34 @@
 ## This is a (hacky) fix for a bug that has appeared in multiple Pulsar servers.
 ## The working directory ends up two levels deep and the visualization html page
 ## fails to load the PDB files as static assets.
-[ -d working ] && cp -r working/* .
+(([ -d working ] && cp -r working/* .) || true)
 
     ]]></command>
     <inputs>
         <conditional name="fasta_or_text">
-            <param name="input_mode" type="select" label="Fasta Input" help="Single protein sequence to fold. Input can be fasta file from history, or text. Sequence must be valid IUPAC amino acid characters. Provide only 1 sequence per job.">
+            <param name="input_mode" type="select" label="Fasta Input" help="Protein sequence(s) to fold. Input can be fasta file from history, or text. Sequence must be valid IUPAC amino acid characters. If multiple sequences FASTA file provided, multimer mode must be selected.">
                 <option value="history">Use fasta from history</option>
                 <option value="textbox">Paste sequence into textbox</option>
             </param>
             <when value="history">
-                <param name="fasta_file" type="data" format="fasta" label="Fasta file from history" help="Select single fasta protein sequence from your history. If you wish to fold multiple proteins, submit an individual job for each protein." />
+                <param name="fasta_file" type="data" multiple="false" format="fasta" label="Fasta file from history" help="Select single FASTA protein file from your history. If you wish to fold multiple proteins, submit an individual job for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in this file." />
             </when>
             <when value="textbox">
-                <param name="fasta_text" type="text" area="true" value="" label="Paste sequence" help="Paste single protein sequence into the textbox. If you wish to fold multiple proteins, submit individual jobs for each protein." />
+                <param name="fasta_text" type="text" area="true" value="" label="Paste sequence" help="Paste single protein sequence into the textbox. If you wish to fold multiple proteins, submit individual jobs for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in FASTA format." />
             </when>
         </conditional>
+
+        <param
+          name="multimer"
+          type="boolean"
+          checked="false"
+          label="Multimer mode"
+          help="Fold a protein multimer from multiple input sequences. You must input multiple sequences to run this mode."
+        />
+
         <param name="output_plddts" type="boolean" checked="false" label="Output per-residue confidence scores" truevalue="--plddts" falsevalue="" help="Alphafold produces a pLDDT score between 0-100 for each residue in the folded models. High scores represent high confidence in placement for the residue, while low scoring residues have lower confidence. Sections of low confidence often occur in disordered regions. " />
     </inputs>
+
     <outputs>
         <data name="model5" format="pdb" from_work_dir="output/alphafold/ranked_4.pdb" label="${tool.name} on ${on_string}: Model 5"/>
         <data name="model4" format="pdb" from_work_dir="output/alphafold/ranked_3.pdb" label="${tool.name} on ${on_string}: Model 4"/>
@@ -161,13 +188,14 @@
 
     **What it does**
 
-    | AlphaFold v2.0: AI-guided 3D structure prediction of proteins
+    | AlphaFold v2.1: AI-guided 3D structure prediction of proteins
     |
 
     *What is AlphaFold?*
 
     | AlphaFold is a program which uses neural networks to predict the tertiary (3D) structure of proteins. AlphaFold accepts an amino acid sequence (in Fasta format), then will 'fold' that sequence into a 3D model.
-    | NOTE: AlphaFold has a number of versions - this tool uses AlphaFold v2.0.
+    |
+    | **NOTE: AlphaFold has numerous versions - this tool uses AlphaFold v2.1.2.**
     |
 
     *What makes AlphaFold different?*
@@ -179,7 +207,7 @@
 
     *Downstream analysis*
 
-    | Obtaining a protein fold is the first step in many analyses.
+    | Obtaining a protein structure prediction is the first step in many analyses.
     | The 3D models created by AlphaFold can be used in downstream analysis, including the following:
     |
 
@@ -192,24 +220,27 @@
         Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation.
         To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD <https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/>`_ and `SeeSAR <https://www.biosolveit.de/SeeSAR>`_, but many `free and open-source options <https://en.wikipedia.org/wiki/List_of_protein-ligand_docking_software>`_ are available such as `AutoDock <https://autodock.scripps.edu/>`_ and `SwissDock <http://www.swissdock.ch/>`_.
 
+    | Protein complex interactions are also commonly observed with AlphaFold's multimer prediction mode.
+    |
+    |
+
     **Input**
 
     *Amino acid sequence*
 
-    | AlphaFold accepts a **single amino acid sequence** in FASTA format.
+    | AlphaFold monomer (default) accepts a **single amino acid sequence** in FASTA format.
     | You can choose to input either a file from your Galaxy history or paste a sequence into a text box.
-    | Please paste only a single sequence - we can only process a single sequence per job.
-    | Multiple sequences will return an error.
+    | If you choose the ``multimer`` option, you can supply a FASTA file containing **multiple sequences** to be folded concurrently into a multimer.
+    |
     |
 
     **Outputs**
 
     *Visualization*
 
-    | An interactive 3D graphic of the best predicted molecular structures.
-    | This output can be opened in Galaxy to give a visual impression of the results, with different structural representations to choose from.
-    | Open the "Visualization" history output by clicking on the "view data" icon:
-    |
+    An interactive 3D graphic of the best predicted molecular structures.
+    This output can be opened in Galaxy to give a visual impression of the results, with different structural representations to choose from.
+    Open the "Visualization" history output by clicking on the "view data" icon:
 
     .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold-visualization.png?raw=true
         :height: 520
@@ -225,15 +256,24 @@
 
     *Model confidence scores (optional)*
 
-    | This optional output produces a file which describes the confidence scores for each model (based on `pLDDTs <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3799472/>`_) which may be useful for downstream analysis.
-    | Model confidence scores are also included as a column in the default PDB output.
+    | This optional output produces a file which describes the confidence scores for each model (based on `pLDDTs <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3799472/>`_, or the ``iptm+ptm`` score if run in multimer mode) which may be useful for downstream analysis.
+    | Model confidence scores are also included as a column (replacing ``bFactor``) in the default PDB output.
+    |
+    |
+
+    **AlphaFold configuration**
+
+    | We have configured AlphaFold to run with the parameters suggested by default on `AlphaFold's GitHub <https://github.com/deepmind/alphafold>`_.
+    | This means that it runs against the full database with Amber relaxation, with ``max_template_date`` set to today's date. If there are additonal parameters that you would like to interact with, please `send a support request to Galaxy AU <https://site.usegalaxy.org.au/request/support>`_, or open an issue on `our GitHub <https://github.com/usegalaxy-au/tools-au>`_.
+    |
     |
 
     **External Resources**
 
-    We recommend checking out the
+    We HIGHLY recommend checking out the
     `Alphafold Protein Structure Database <https://alphafold.ebi.ac.uk/>`_,
-    which contains predicted sequences for thousands of Human proteins. See also:
+    which contains pre-computed structures for over 200 million known proteins.
+    See also:
 
     - `Google Deepmind's article on AlphaFold <https://deepmind.com/blog/article/alphafold-a-solution-to-a-50-year-old-grand-challenge-in-biology>`_
     - `AlphaFold source code on GitHub <https://github.com/deepmind/alphafold>`_
@@ -241,5 +281,6 @@
     ]]></help>
     <citations>
         <citation type="doi">https://doi.org/10.1038/s41586-021-03819-2</citation>
+        <citation type="doi">https://doi.org/10.1101/2021.10.04.463034</citation>
     </citations>
 </tool>