Repository 'alphafold2'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxy-australia/alphafold2

Changeset 15:a58f7eb0df2c (2023-03-10)
Previous changeset 14:d00e15139065 (2023-02-28) Next changeset 16:f9eb041c518c (2023-04-03)
Commit message:
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit fd45a857a71358e7e5375dcfb5043cdc8560c5a5
modified:
alphafold.xml
outputs.py
validate_fasta.py
added:
macro_output.xml
macro_test_output.xml
b
diff -r d00e15139065 -r a58f7eb0df2c alphafold.xml
--- a/alphafold.xml Tue Feb 28 01:15:42 2023 +0000
+++ b/alphafold.xml Fri Mar 10 02:48:07 2023 +0000
[
b'@@ -2,7 +2,9 @@\n     <description> - AI-guided 3D structural prediction of proteins</description>\n     <macros>\n       <token name="@TOOL_VERSION@">2.3.1</token>\n-      <token name="@VERSION_SUFFIX@">0</token>\n+      <token name="@VERSION_SUFFIX@">1</token>\n+      <import>macro_output.xml</import>\n+      <import>macro_test_output.xml</import>\n     </macros>\n     <edam_topics>\n       <edam_topic>topic_0082</edam_topic>\n@@ -14,17 +16,20 @@\n       <xref type="bio.tools">alphafold_2</xref>\n     </xrefs>\n     <requirements>\n-        <container type="docker">neoformit/alphafold:v2.3.1_1</container>\n+        <container type="docker">neoformit/alphafold:v2.3.1_2</container>\n     </requirements>\n     <command detect_errors="exit_code"><![CDATA[\n \n+## Developers: to test with mock alphafold run, set `export PLANEMO_TESTING=1`\n+## in planemo\'s gx_venv_n/bin/activate script. AlphaFold outputs will be copied\n+## from the test-data directory instead of running the tool.\n+\n ## $ALPHAFOLD_DB variable should point to the location of the AlphaFold\n ## databases - defaults to /data\n \n-## Read FASTA input ----------------------------\n+## Read FASTA input -----------------------------------------------------------\n #if $fasta_or_text.input_mode == \'history\':\n     cp \'$fasta_or_text.fasta_file\' input.fasta\n-\n #elif $fasta_or_text.input_mode == \'textbox\':\n     echo \'$fasta_or_text.fasta_text\' > input.fasta\n #end if\n@@ -32,55 +37,66 @@\n && python3 \'$__tool_directory__/validate_fasta.py\' input.fasta\n --min_length \\${ALPHAFOLD_AA_LENGTH_MIN:-0}\n --max_length \\${ALPHAFOLD_AA_LENGTH_MAX:-0}\n-#if $multimer:\n+#if $model_preset == \'multimer\':\n --multimer\n #end if\n > alphafold.fasta\n \n-## Env vars -------------------------------\n+## Env vars -------------------------------------------------------------------\n && export TF_FORCE_UNIFIED_MEMORY=1\n && export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0\n && export TODAY=`date +"%Y-%m-%d"`\n \n-## Run alphafold  -------------------------\n-&& python /app/alphafold/run_alphafold.py\n-    --fasta_paths alphafold.fasta\n-    --output_dir output\n-    --data_dir \\${ALPHAFOLD_DB:-/data}\n-\n-    ## Set reference database paths\n-    --uniref90_database_path   \\${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta\n-    --mgnify_database_path     \\${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2022_05.fa\n-    --template_mmcif_dir       \\${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files\n-    --obsolete_pdbs_path       \\${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat\n-    #if $dbs == \'full\':\n-    --bfd_database_path        \\${ALPHAFOLD_DB:-/data}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt\n-    --uniref30_database_path   \\${ALPHAFOLD_DB:-/data}/uniref30/UniRef30_2021_03\n-    #else\n-    --db_preset=reduced_dbs\n-    --small_bfd_database_path  \\${ALPHAFOLD_DB:-/data}/small_bfd/bfd-first_non_consensus_sequences.fasta\n-    #end if\n+## Run AlphaFold  -------------------------------------------------------------\n+#if os.environ.get(\'PLANEMO_TESTING\'):\n+    ## Run in testing mode (mocks a successful AlphaFold run by copying outputs)\n+    && echo "Creating dummy outputs for model_preset=$model_preset..."\n+    && bash \'$__tool_directory__/mock_alphafold.sh\' $model_preset\n+#else:\n+    ## Run AlphaFold\n+    && python /app/alphafold/run_alphafold.py\n+        --fasta_paths alphafold.fasta\n+        --output_dir output\n+        --data_dir \\${ALPHAFOLD_DB:-/data}\n+        --model_preset=$model_preset\n \n-    #if $max_template_date:\n-    --max_template_date=$max_template_date\n-    #else\n-    --max_template_date=\\$TODAY\n-    #end if\n+        ## Set reference database paths\n+        --uniref90_database_path   \\${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta\n+        --mgnify_database_path     \\${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2022_05.fa\n+        --template_mmcif_dir       \\${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files\n+        --obsolete_pdbs_path       \\${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat\n+        #if $dbs == \'full\':\n+        --bfd_database_p'..b'true"/>\n+            <param name="outputs|relax_json" value="true"/>\n+            <expand macro="test_output_plots_2" />\n+            <expand macro="test_output_confidence_scores" />\n+            <expand macro="test_output_plddts" />\n+            <expand macro="test_output_pdb_models" />\n+            <expand macro="test_output_pickles" />\n+            <expand macro="test_output_relax_json" />\n+            <expand macro="test_output_pae_csv" />\n+        </test>\n+\n+        <!-- Test multimer with all outputs -->\n+        <test expect_num_outputs="24">\n+            <conditional name="fasta_or_text">\n+                <param name="input_mode" value="history"/>\n+                <param name="fasta_file" value="multimer.fasta"/>\n+            </conditional>\n+            <param name="model_preset" value="multimer"/>\n+            <param name="outputs|plots" value="true"/>\n+            <param name="outputs|confidence_scores" value="true"/>\n+            <param name="outputs|plddts" value="true"/>\n+            <param name="outputs|pae_csv" value="true"/>\n+            <param name="outputs|model_pkls" value="true"/>\n+            <param name="outputs|relax_json" value="true"/>\n+            <expand macro="test_output_plots_3" />\n+            <expand macro="test_output_confidence_scores" />\n+            <expand macro="test_output_plddts" />\n+            <expand macro="test_output_pdb_models" />\n+            <expand macro="test_output_pickles" />\n+            <expand macro="test_output_relax_json" />\n+            <expand macro="test_output_pae_csv" />\n         </test>\n     </tests>\n     <help><![CDATA[\n@@ -389,19 +385,36 @@\n     *Model data files (ranked_n.pkl)*\n \n     | Per-model data stored in pickle files (a Python binary data format). These files can be used as inputs to downstream analysis software (such as Chimera X) for visualizing structures and computing kinetics between protein multimers and domains.\n-    | The tool will produce one ``.pkl`` output for each of the PDB models.\n+    | The tool will produce one ``.pkl`` output for each PDB model.\n+    |\n+    |\n+\n+    *pLDDT + PAE plots (optional)*\n+\n+    | A two-panel figure in PNG format showing:\n+    | a) pLDDT score plotted against residue position\n+    | b) a heatmap of predicted-alignment error (PAE) with residue position running along vertical and horizontal axes and color at each pixel indicating PAE value for the corresponding pair of residues.\n+    | Panel b) is only produced for ``monomer_ptm`` and ``multimer`` model presets.\n+    |\n+    |\n+\n+    *Model predicted-alignment error matrix (pae_ranked_n.csv)*\n+\n+    | Per-model predicted-alignment error (PAE) matrix - only available with the ``monomer_ptm`` and ``multimer`` model presets.\n+    | The tool will produce one ``.csv`` output for each PDB model.\n     |\n     |\n \n     *relax_metrics.json (optional)*\n \n-    | A JSON-formatted text file containing relax metrics (mostly remaining violations).\n+    | A JSON-formatted text file containing relax metrics (primarily remaining violations).\n+    |\n     |\n \n     **AlphaFold configuration**\n \n     | We have configured AlphaFold to run with the parameters suggested by default on `AlphaFold\'s GitHub <https://github.com/deepmind/alphafold>`_.\n-    | This means that it runs with Amber relaxation enabled, with relaxed PDB models collected as output datasets. If there are additonal parameters that you would like to interact with, please `send a support request to Galaxy AU <https://site.usegalaxy.org.au/request/support>`_, or open an issue on `our GitHub <https://github.com/usegalaxy-au/tools-au>`_.\n+    | This means that it runs with Amber relaxation enabled, with relaxed PDB models collected as output datasets (ranked\\_*.pdb files). If there are additonal parameters that you would like to interact with, please `send a support request to Galaxy AU <https://site.usegalaxy.org.au/request/support>`_, or open an issue on `our GitHub <https://github.com/usegalaxy-au/tools-au>`_.\n     |\n     |\n \n'
b
diff -r d00e15139065 -r a58f7eb0df2c macro_output.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macro_output.xml Fri Mar 10 02:48:07 2023 +0000
[
@@ -0,0 +1,176 @@
+<macros>
+    <xml name="output_pdb_models">
+        <data name="model5" format="pdb" from_work_dir="output/alphafold/ranked_4.pdb" label="${tool.name} on ${on_string}: PDB ranked 4"/>
+        <data name="model4" format="pdb" from_work_dir="output/alphafold/ranked_3.pdb" label="${tool.name} on ${on_string}: PDB ranked 3"/>
+        <data name="model3" format="pdb" from_work_dir="output/alphafold/ranked_2.pdb" label="${tool.name} on ${on_string}: PDB ranked 2"/>
+        <data name="model2" format="pdb" from_work_dir="output/alphafold/ranked_1.pdb" label="${tool.name} on ${on_string}: PDB ranked 1"/>
+        <data name="model1" format="pdb" from_work_dir="output/alphafold/ranked_0.pdb" label="${tool.name} on ${on_string}: PDB ranked 0"/>
+    </xml>
+
+    <xml name="output_pae_csv">
+        <data
+            name="pae_ranked_4"
+            format="csv"
+            from_work_dir="output/alphafold/extra/pae_ranked_4.csv"
+            label="${tool.name} on ${on_string}: pae_ranked_4.csv"
+        >
+            <filter>outputs['pae_csv']</filter>
+            <filter>model_preset != "monomer"</filter>
+        </data>
+        <data
+            name="pae_ranked_3"
+            format="csv"
+            from_work_dir="output/alphafold/extra/pae_ranked_3.csv"
+            label="${tool.name} on ${on_string}: pae_ranked_3.csv"
+        >
+            <filter>outputs['pae_csv']</filter>
+            <filter>model_preset != "monomer"</filter>
+        </data>
+        <data
+            name="pae_ranked_2"
+            format="csv"
+            from_work_dir="output/alphafold/extra/pae_ranked_2.csv"
+            label="${tool.name} on ${on_string}: pae_ranked_2.csv"
+        >
+            <filter>outputs['pae_csv']</filter>
+            <filter>model_preset != "monomer"</filter>
+        </data>
+        <data
+            name="pae_ranked_1"
+            format="csv"
+            from_work_dir="output/alphafold/extra/pae_ranked_1.csv"
+            label="${tool.name} on ${on_string}: pae_ranked_1.csv"
+        >
+            <filter>outputs['pae_csv']</filter>
+            <filter>model_preset != "monomer"</filter>
+        </data>
+        <data
+            name="pae_ranked_0"
+            format="csv"
+            from_work_dir="output/alphafold/extra/pae_ranked_0.csv"
+            label="${tool.name} on ${on_string}: pae_ranked_0.csv"
+        >
+            <filter>outputs['pae_csv']</filter>
+            <filter>model_preset != "monomer"</filter>
+        </data>
+    </xml>
+
+    <xml name="output_pickles">
+        <data
+            name="output_ranked_4_pkl"
+            format="binary"
+            from_work_dir="output/alphafold/extra/ranked_4.pkl"
+            label="${tool.name} on ${on_string}: ranked_4.pkl"
+        >
+            <filter>outputs['model_pkls']</filter>
+        </data>
+        <data
+            name="output_ranked_3_pkl"
+            format="binary"
+            from_work_dir="output/alphafold/extra/ranked_3.pkl"
+            label="${tool.name} on ${on_string}: ranked_3.pkl"
+        >
+            <filter>outputs['model_pkls']</filter>
+        </data>
+        <data
+            name="output_ranked_2_pkl"
+            format="binary"
+            from_work_dir="output/alphafold/extra/ranked_2.pkl"
+            label="${tool.name} on ${on_string}: ranked_2.pkl"
+        >
+            <filter>outputs['model_pkls']</filter>
+        </data>
+        <data
+            name="output_ranked_1_pkl"
+            format="binary"
+            from_work_dir="output/alphafold/extra/ranked_1.pkl"
+            label="${tool.name} on ${on_string}: ranked_1.pkl"
+        >
+            <filter>outputs['model_pkls']</filter>
+        </data>
+        <data
+            name="output_ranked_0_pkl"
+            format="binary"
+            from_work_dir="output/alphafold/extra/ranked_0.pkl"
+            label="${tool.name} on ${on_string}: ranked_0.pkl"
+        >
+            <filter>outputs['model_pkls']</filter>
+        </data>
+    </xml>
+
+    <xml name="output_plots">
+        <data
+            name="plot_ranked_4"
+            format="png"
+            from_work_dir="output/alphafold/extra/ranked_4.png"
+            label="${tool.name} on ${on_string}: pLDDT/PAE plot ranked 4"
+        >
+            <filter>outputs['plots']</filter>
+        </data>
+        <data
+            name="plot_ranked_3"
+            format="png"
+            from_work_dir="output/alphafold/extra/ranked_3.png"
+            label="${tool.name} on ${on_string}: pLDDT/PAE plot ranked 3"
+        >
+            <filter>outputs['plots']</filter>
+        </data>
+        <data
+            name="plot_ranked_2"
+            format="png"
+            from_work_dir="output/alphafold/extra/ranked_2.png"
+            label="${tool.name} on ${on_string}: pLDDT/PAE plot ranked 2"
+        >
+            <filter>outputs['plots']</filter>
+        </data>
+        <data
+            name="plot_ranked_1"
+            format="png"
+            from_work_dir="output/alphafold/extra/ranked_1.png"
+            label="${tool.name} on ${on_string}: pLDDT/PAE plot ranked 1"
+        >
+            <filter>outputs['plots']</filter>
+        </data>
+        <data
+            name="plot_ranked_0"
+            format="png"
+            from_work_dir="output/alphafold/extra/ranked_0.png"
+            label="${tool.name} on ${on_string}: pLDDT/PAE plot ranked 0"
+        >
+            <filter>outputs['plots']</filter>
+        </data>
+    </xml>
+
+    <xml name="output_confidence_scores">
+        <data
+            name="output_confidence_scores"
+            format="tabular"
+            from_work_dir="output/alphafold/extra/model_confidence_scores.tsv"
+            label="${tool.name} on ${on_string}: Model confidence scores"
+        >
+            <filter>outputs['confidence_scores']</filter>
+        </data>
+    </xml>
+
+    <xml name="output_plddts">
+        <data
+            name="output_plddts"
+            format="tabular"
+            from_work_dir="output/alphafold/extra/plddts.tsv"
+            label="${tool.name} on ${on_string}: Per-residue confidence scores (plddts)"
+        >
+            <filter>outputs['plddts']</filter>
+        </data>
+    </xml>
+
+    <xml name="output_relax_json">
+        <data
+            name="output_relax_json"
+            format="json"
+            from_work_dir="output/alphafold/extra/relax_metrics_ranked.json"
+            label="${tool.name} on ${on_string}: relax_metrics_ranked.json"
+        >
+            <filter>outputs['relax_json']</filter>
+        </data>
+    </xml>
+</macros>
b
diff -r d00e15139065 -r a58f7eb0df2c macro_test_output.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macro_test_output.xml Fri Mar 10 02:48:07 2023 +0000
b
@@ -0,0 +1,199 @@
+<macros>
+    <xml name="test_output_plddts">
+        <output name="output_plddts">
+            <assert_contents>
+                <has_n_lines n="5"/>
+                <has_size min="2000" />
+            </assert_contents>
+        </output>
+    </xml>
+
+    <xml name="test_output_confidence_scores">
+        <output name="output_confidence_scores">
+            <assert_contents>
+                <has_n_columns n="2"/>
+                <has_n_lines n="5"/>
+                <has_size value="70" delta="50"/>
+            </assert_contents>
+        </output>
+    </xml>
+
+    <xml name="test_output_relax_json">
+        <output name="output_relax_json">
+            <assert_contents>
+                <has_size min="500" />
+            </assert_contents>
+        </output>
+    </xml>
+
+    <xml name="test_output_pdb_models">
+        <output name="model1">
+            <assert_contents>
+                <has_size min="20000"/>
+            </assert_contents>
+        </output>
+        <output name="model2">
+            <assert_contents>
+                <has_size min="20000"/>
+            </assert_contents>
+        </output>
+        <output name="model3">
+            <assert_contents>
+                <has_size min="20000"/>
+            </assert_contents>
+        </output>
+        <output name="model4">
+            <assert_contents>
+                <has_size min="20000"/>
+            </assert_contents>
+        </output>
+        <output name="model5">
+            <assert_contents>
+                <has_size min="20000"/>
+            </assert_contents>
+        </output>
+    </xml>
+
+    <xml name="test_output_plots_1">
+        <!-- For one-panel plot without PAE heatmap -->
+        <output name="plot_ranked_0">
+            <assert_contents>
+                <has_size min="10000" max="50000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_1">
+            <assert_contents>
+                <has_size min="10000" max="50000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_2">
+            <assert_contents>
+                <has_size min="10000" max="50000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_3">
+            <assert_contents>
+                <has_size min="10000" max="50000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_4">
+            <assert_contents>
+                <has_size min="10000" max="50000" />
+            </assert_contents>
+        </output>
+    </xml>
+
+    <xml name="test_output_plots_2">
+        <!-- For two-panel plot with PAE heatmap -->
+        <output name="plot_ranked_0">
+            <assert_contents>
+                <has_size min="50000" max="63000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_1">
+            <assert_contents>
+                <has_size min="50000" max="63000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_2">
+            <assert_contents>
+                <has_size min="50000" max="63000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_3">
+            <assert_contents>
+                <has_size min="50000" max="63000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_4">
+            <assert_contents>
+                <has_size min="50000" max="63000" />
+            </assert_contents>
+        </output>
+    </xml>
+
+    <xml name="test_output_plots_3">
+        <!-- For two-panel plot with PAE heatmap -->
+        <output name="plot_ranked_0">
+            <assert_contents>
+                <has_size min="220000" max="270000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_1">
+            <assert_contents>
+                <has_size min="220000" max="270000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_2">
+            <assert_contents>
+                <has_size min="220000" max="270000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_3">
+            <assert_contents>
+                <has_size min="220000" max="270000" />
+            </assert_contents>
+        </output>
+        <output name="plot_ranked_4">
+            <assert_contents>
+                <has_size min="220000" max="270000" />
+            </assert_contents>
+        </output>
+    </xml>
+
+    <xml name="test_output_pae_csv">
+        <output name="pae_ranked_0">
+            <assert_contents>
+                <has_size min="5000"/>
+            </assert_contents>
+        </output>
+        <output name="pae_ranked_1">
+            <assert_contents>
+                <has_size min="5000"/>
+            </assert_contents>
+        </output>
+        <output name="pae_ranked_2">
+            <assert_contents>
+                <has_size min="5000"/>
+            </assert_contents>
+        </output>
+        <output name="pae_ranked_3">
+            <assert_contents>
+                <has_size min="5000"/>
+            </assert_contents>
+        </output>
+        <output name="pae_ranked_4">
+            <assert_contents>
+                <has_size min="5000"/>
+            </assert_contents>
+        </output>
+    </xml>
+
+    <xml name="test_output_pickles">
+        <output name="output_ranked_4_pkl">
+            <assert_contents>
+                <has_size min="1000000"/>
+            </assert_contents>
+        </output>
+        <output name="output_ranked_3_pkl">
+            <assert_contents>
+                <has_size min="1000000"/>
+            </assert_contents>
+        </output>
+        <output name="output_ranked_2_pkl">
+            <assert_contents>
+                <has_size min="1000000"/>
+            </assert_contents>
+        </output>
+        <output name="output_ranked_1_pkl">
+            <assert_contents>
+                <has_size min="1000000"/>
+            </assert_contents>
+        </output>
+        <output name="output_ranked_0_pkl">
+            <assert_contents>
+                <has_size min="1000000"/>
+            </assert_contents>
+        </output>
+    </xml>
+</macros>
b
diff -r d00e15139065 -r a58f7eb0df2c outputs.py
--- a/outputs.py Tue Feb 28 01:15:42 2023 +0000
+++ b/outputs.py Fri Mar 10 02:48:07 2023 +0000
[
@@ -19,13 +19,16 @@
 import os
 import pickle as pk
 import shutil
+from matplotlib import pyplot as plt
 from pathlib import Path
 from typing import List
 
-# Output file names
+# Output file paths
 OUTPUT_DIR = 'extra'
 OUTPUTS = {
     'model_pkl': OUTPUT_DIR + '/ranked_{rank}.pkl',
+    'model_pae': OUTPUT_DIR + '/pae_ranked_{rank}.csv',
+    'model_plot': OUTPUT_DIR + '/ranked_{rank}.png',
     'model_confidence_scores': OUTPUT_DIR + '/model_confidence_scores.tsv',
     'plddts': OUTPUT_DIR + '/plddts.tsv',
     'relax': OUTPUT_DIR + '/relax_metrics_ranked.json',
@@ -46,8 +49,9 @@
         self.output_confidence_scores = True
         self.output_residue_scores = False
         self.is_multimer = False
+        self.parse()
 
-    def parse_settings(self) -> None:
+    def parse(self) -> None:
         parser = argparse.ArgumentParser()
         parser.add_argument(
             "workdir",
@@ -67,15 +71,26 @@
             action="store_true"
         )
         parser.add_argument(
-            "--model-pkl",
-            dest="model_pkl",
+            "--pkl",
             help="rename model pkl outputs with rank order",
             action="store_true"
         )
+        parser.add_argument(
+            "--pae",
+            help="extract PAE from pkl files to CSV format",
+            action="store_true"
+        )
+        parser.add_argument(
+            "--plot",
+            help="Plot pLDDT and PAE for each model",
+            action="store_true"
+        )
         args = parser.parse_args()
         self.workdir = Path(args.workdir.rstrip('/'))
         self.output_residue_scores = args.plddts
-        self.output_model_pkls = args.model_pkl
+        self.output_model_pkls = args.pkl
+        self.output_model_plots = args.plot
+        self.output_pae = args.pae
         self.is_multimer = args.multimer
         self.output_dir = self.workdir / OUTPUT_DIR
         os.makedirs(self.output_dir, exist_ok=True)
@@ -212,6 +227,31 @@
         shutil.copyfile(path, new_path)
 
 
+def extract_pae_to_csv(ranking: ResultRanking, context: ExecutionContext):
+    """Extract predicted alignment error matrix from pickle files.
+
+    Creates a CSV file for each of five ranked models.
+    """
+    for path in context.model_pkl_paths:
+        model = ResultModelPrediction(path, context)
+        rank = ranking.get_rank_for_model(model.name)
+        with open(path, 'rb') as f:
+            data = pk.load(f)
+        if 'predicted_aligned_error' not in data:
+            print("Skipping PAE output"
+                  f" - not found in {path}."
+                  " Running with model_preset=monomer?")
+            return
+        pae = data['predicted_aligned_error']
+        out_path = (
+            context.settings.workdir
+            / OUTPUTS['model_pae'].format(rank=rank)
+        )
+        with open(out_path, 'w') as f:
+            for row in pae:
+                f.write(','.join([str(x) for x in row]) + '\n')
+
+
 def rekey_relax_metrics(ranking: ResultRanking, context: ExecutionContext):
     """Replace keys in relax_metrics.json with 0-indexed rank."""
     with open(context.relax_metrics) as f:
@@ -224,10 +264,44 @@
         json.dump(data, f)
 
 
+def plddt_pae_plots(ranking: ResultRanking, context: ExecutionContext):
+    """Generate a pLDDT + PAE plot for each model."""
+    for path in context.model_pkl_paths:
+        num_plots = 2
+        model = ResultModelPrediction(path, context)
+        rank = ranking.get_rank_for_model(model.name)
+        png_path = (
+            context.settings.workdir
+            / OUTPUTS['model_plot'].format(rank=rank)
+        )
+        plddts = model.data['plddt']
+        if 'predicted_aligned_error' in model.data:
+            pae = model.data['predicted_aligned_error']
+            max_pae = model.data['max_predicted_aligned_error']
+        else:
+            num_plots = 1
+
+        plt.figure(figsize=[8 * num_plots, 6])
+        plt.subplot(1, num_plots, 1)
+        plt.plot(plddts)
+        plt.title('Predicted LDDT')
+        plt.xlabel('Residue')
+        plt.ylabel('pLDDT')
+
+        if num_plots == 2:
+            plt.subplot(1, 2, 2)
+            plt.imshow(pae, vmin=0., vmax=max_pae, cmap='Greens_r')
+            plt.colorbar(fraction=0.046, pad=0.04)
+            plt.title('Predicted Aligned Error')
+            plt.xlabel('Scored residue')
+            plt.ylabel('Aligned residue')
+
+        plt.savefig(png_path)
+
+
 def main():
     """Parse output files and generate additional output files."""
     settings = Settings()
-    settings.parse_settings()
     context = ExecutionContext(settings)
     ranking = ResultRanking(context)
     write_confidence_scores(ranking, context)
@@ -236,7 +310,11 @@
     # Optional outputs
     if settings.output_model_pkls:
         rename_model_pkls(ranking, context)
-
+    if settings.output_model_plots:
+        plddt_pae_plots(ranking, context)
+    if settings.output_pae:
+        # Only created by monomer_ptm and multimer models
+        extract_pae_to_csv(ranking, context)
     if settings.output_residue_scores:
         write_per_residue_scores(ranking, context)
 
b
diff -r d00e15139065 -r a58f7eb0df2c validate_fasta.py
--- a/validate_fasta.py Tue Feb 28 01:15:42 2023 +0000
+++ b/validate_fasta.py Fri Mar 10 02:48:07 2023 +0000
b
@@ -205,6 +205,11 @@
         for fas in clean_fastas:
             fw.write(fas)
 
+        sys.stderr.write("Validated FASTA sequence(s):\n\n")
+        for fas in clean_fastas:
+            sys.stderr.write(fas.header + '\n')
+            sys.stderr.write(fas.aa_seq + '\n\n')
+
     except ValueError as exc:
         sys.stderr.write(f"{exc}\n\n")
         raise exc