+ + - - - - - + - - - outputs['confidence_scores'] - - - - outputs['plddts'] - - - - outputs['model_pkls'] - - - outputs['model_pkls'] - - - outputs['model_pkls'] - - - outputs['model_pkls'] - - - outputs['model_pkls'] - - - outputs['relax_json'] - + + + + + + - + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `_. - | This means that it runs with Amber relaxation enabled, with relaxed PDB models collected as output datasets. If there are additonal parameters that you would like to interact with, please `send a support request to Galaxy AU `_, or open an issue on `our GitHub `_. + | This means that it runs with Amber relaxation enabled, with relaxed PDB models collected as output datasets (ranked\_*.pdb files). If there are additonal parameters that you would like to interact with, please `send a support request to Galaxy AU `_, or open an issue on `our GitHub `_. | | diff -r d00e15139065 -r a58f7eb0df2c macro_output.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macro_output.xml Fri Mar 10 02:48:07 2023 +0000 @@ -0,0 +1,176 @@ + + + + + + + + + + + + outputs['pae_csv'] + model_preset != "monomer" + + + outputs['pae_csv'] + model_preset != "monomer" + + + outputs['pae_csv'] + model_preset != "monomer" + + + outputs['pae_csv'] + model_preset != "monomer" + + + outputs['pae_csv'] + model_preset != "monomer" + + + + + + outputs['model_pkls'] + + + outputs['model_pkls'] + + + outputs['model_pkls'] + + + outputs['model_pkls'] + + + outputs['model_pkls'] + + + + + + outputs['plots'] + + + outputs['plots'] + + + outputs['plots'] + + + outputs['plots'] + + + outputs['plots'] + + + + + + outputs['confidence_scores'] + + + + + + outputs['plddts'] + + + + + + outputs['relax_json'] + + + diff -r d00e15139065 -r a58f7eb0df2c macro_test_output.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macro_test_output.xml Fri Mar 10 02:48:07 2023 +0000 @@ -0,0 +1,199 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r d00e15139065 -r a58f7eb0df2c outputs.py --- a/outputs.py Tue Feb 28 01:15:42 2023 +0000 +++ b/outputs.py Fri Mar 10 02:48:07 2023 +0000 @@ -19,13 +19,16 @@ import os import pickle as pk import shutil +from matplotlib import pyplot as plt from pathlib import Path from typing import List -# Output file names +# Output file paths OUTPUT_DIR = 'extra' OUTPUTS = { 'model_pkl': OUTPUT_DIR + '/ranked_{rank}.pkl', + 'model_pae': OUTPUT_DIR + '/pae_ranked_{rank}.csv', + 'model_plot': OUTPUT_DIR + '/ranked_{rank}.png', 'model_confidence_scores': OUTPUT_DIR + '/model_confidence_scores.tsv', 'plddts': OUTPUT_DIR + '/plddts.tsv', 'relax': OUTPUT_DIR + '/relax_metrics_ranked.json', @@ -46,8 +49,9 @@ self.output_confidence_scores = True self.output_residue_scores = False self.is_multimer = False + self.parse() - def parse_settings(self) -> None: + def parse(self) -> None: parser = argparse.ArgumentParser() parser.add_argument( "workdir", @@ -67,15 +71,26 @@ action="store_true" ) parser.add_argument( - "--model-pkl", - dest="model_pkl", + "--pkl", help="rename model pkl outputs with rank order", action="store_true" ) + parser.add_argument( + "--pae", + help="extract PAE from pkl files to CSV format", + action="store_true" + ) + parser.add_argument( + "--plot", + help="Plot pLDDT and PAE for each model", + action="store_true" + ) args = parser.parse_args() self.workdir = Path(args.workdir.rstrip('/')) self.output_residue_scores = args.plddts - self.output_model_pkls = args.model_pkl + self.output_model_pkls = args.pkl + self.output_model_plots = args.plot + self.output_pae = args.pae self.is_multimer = args.multimer self.output_dir = self.workdir / OUTPUT_DIR os.makedirs(self.output_dir, exist_ok=True) @@ -212,6 +227,31 @@ shutil.copyfile(path, new_path) +def extract_pae_to_csv(ranking: ResultRanking, context: ExecutionContext): + """Extract predicted alignment error matrix from pickle files. + + Creates a CSV file for each of five ranked models. + """ + for path in context.model_pkl_paths: + model = ResultModelPrediction(path, context) + rank = ranking.get_rank_for_model(model.name) + with open(path, 'rb') as f: + data = pk.load(f) + if 'predicted_aligned_error' not in data: + print("Skipping PAE output" + f" - not found in {path}." + " Running with model_preset=monomer?") + return + pae = data['predicted_aligned_error'] + out_path = ( + context.settings.workdir + / OUTPUTS['model_pae'].format(rank=rank) + ) + with open(out_path, 'w') as f: + for row in pae: + f.write(','.join([str(x) for x in row]) + '\n') + + def rekey_relax_metrics(ranking: ResultRanking, context: ExecutionContext): """Replace keys in relax_metrics.json with 0-indexed rank.""" with open(context.relax_metrics) as f: @@ -224,10 +264,44 @@ json.dump(data, f) +def plddt_pae_plots(ranking: ResultRanking, context: ExecutionContext): + """Generate a pLDDT + PAE plot for each model.""" + for path in context.model_pkl_paths: + num_plots = 2 + model = ResultModelPrediction(path, context) + rank = ranking.get_rank_for_model(model.name) + png_path = ( + context.settings.workdir + / OUTPUTS['model_plot'].format(rank=rank) + ) + plddts = model.data['plddt'] + if 'predicted_aligned_error' in model.data: + pae = model.data['predicted_aligned_error'] + max_pae = model.data['max_predicted_aligned_error'] + else: + num_plots = 1 + + plt.figure(figsize=[8 * num_plots, 6]) + plt.subplot(1, num_plots, 1) + plt.plot(plddts) + plt.title('Predicted LDDT') + plt.xlabel('Residue') + plt.ylabel('pLDDT') + + if num_plots == 2: + plt.subplot(1, 2, 2) + plt.imshow(pae, vmin=0., vmax=max_pae, cmap='Greens_r') + plt.colorbar(fraction=0.046, pad=0.04) + plt.title('Predicted Aligned Error') + plt.xlabel('Scored residue') + plt.ylabel('Aligned residue') + + plt.savefig(png_path) + + def main(): """Parse output files and generate additional output files.""" settings = Settings() - settings.parse_settings() context = ExecutionContext(settings) ranking = ResultRanking(context) write_confidence_scores(ranking, context) @@ -236,7 +310,11 @@ # Optional outputs if settings.output_model_pkls: rename_model_pkls(ranking, context) - + if settings.output_model_plots: + plddt_pae_plots(ranking, context) + if settings.output_pae: + # Only created by monomer_ptm and multimer models + extract_pae_to_csv(ranking, context) if settings.output_residue_scores: write_per_residue_scores(ranking, context) diff -r d00e15139065 -r a58f7eb0df2c validate_fasta.py --- a/validate_fasta.py Tue Feb 28 01:15:42 2023 +0000 +++ b/validate_fasta.py Fri Mar 10 02:48:07 2023 +0000 @@ -205,6 +205,11 @@ for fas in clean_fastas: fw.write(fas) + sys.stderr.write("Validated FASTA sequence(s):\n\n") + for fas in clean_fastas: + sys.stderr.write(fas.header + '\n') + sys.stderr.write(fas.aa_seq + '\n\n') + except ValueError as exc: sys.stderr.write(f"{exc}\n\n") raise exc