comparison ramachandran_auto_protein.py @ 0:743bd6aa3c7a draft

"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
author chemteam
date Mon, 24 Aug 2020 16:41:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:743bd6aa3c7a
1 #!/usr/bin/env python
2
3 import argparse
4 import base64
5 import importlib
6 import sys
7
8 import MDAnalysis as mda
9 from MDAnalysis.analysis.dihedrals import Ramachandran
10
11 import h5py
12
13 from jinja2 import Environment, FileSystemLoader
14
15 import matplotlib
16 import matplotlib.pyplot as plt
17 import matplotlib.ticker as ticker
18
19 import numpy as np
20 import numpy.linalg
21
22 import seaborn as sns
23
24
25 matplotlib.use('Agg') # noqa
26
27
28 def parse_command_line(argv):
29 parser = argparse.ArgumentParser()
30 parser.add_argument('--itraj', help='input traj')
31 parser.add_argument('--istr', help='input str')
32 parser.add_argument('--itrajext', help='input traj ext')
33 parser.add_argument('--istrext', help='input str ext')
34 parser.add_argument('--isegid1', help='segid 1')
35 parser.add_argument('--iresid1', help='resid start')
36 parser.add_argument('--iresid2', help='resid end')
37 parser.add_argument('--iresname', help='resname e.g. ALA')
38 parser.add_argument('--igroupby', help='groupby names or ids')
39 parser.add_argument('--itemplatepath', help='template path')
40 parser.add_argument('--o_plot1', help='MDA Ramachandran plot')
41 parser.add_argument('--o_plot2', help='Seaborn Ramachandran plot')
42 parser.add_argument('--o_data1', help='Timeseries in HDF5 format')
43 parser.add_argument('--o_html1', help='Html overview output of all plots')
44 return parser.parse_args()
45
46
47 args = parse_command_line(sys.argv)
48
49 currentpath = "."
50 if args.itemplatepath is not None:
51 currentpath = args.itemplatepath
52
53
54 u = mda.Universe(args.istr, args.itraj,
55 topology_format=args.istrext, format=args.itrajext)
56 selection = "(segid %s)" % \
57 (args.isegid1)
58
59 if args.iresname is not None:
60 selection = "(segid %s and resname %s)" % \
61 (args.isegid1, args.iresname)
62
63 if args.iresid1 is not None and args.iresid2 is not None:
64 assert(int(args.iresid1) > 0), "ResID numbering starts at 1 for this tool."
65 assert(int(args.iresid2) > 0), "ResID numbering starts at 1 for this tool."
66 assert(int(args.iresid2) > int(args.iresid1)
67 ), "ResID2 must be at least ResID1+1"
68 selection = "(segid %s and resid %s-%s)" % \
69 (args.isegid1, int(args.iresid1), int(args.iresid2))
70 if args.iresname is not None:
71 selection = "(segid %s and resid %s-%s and resname %s)" % \
72 (args.isegid1, int(args.iresid1), int(args.iresid2), args.iresname)
73
74 r = u.select_atoms(selection)
75
76 assert(r != u.select_atoms('name thiscannotpossiblyexist')
77 ), \
78 """The selection you specified returns an empty result.
79 Check segment names and residue ID's. Also check the
80 structure and trajectory file selected are the correct ones"""
81
82 if args.igroupby is not None:
83 group_selections = {} # dictionary of selections
84 if args.igroupby == 'name':
85 groupby = sorted(list(set(r.resnames)))
86 for e in groupby:
87 s = r & u.select_atoms("resname %s" % e)
88 this_sel = "%s and resname %s" % (selection, e)
89 group_selections[this_sel] = s
90 elif args.igroupby == 'id':
91 groupby = sorted(list(set(r.resids)))
92 for e in groupby:
93 s = r & u.select_atoms("resid %s" % e)
94 this_sel = "%s and resid %s" % (selection, e)
95 group_selections[this_sel] = s
96 else:
97 assert False, ("Invalid argument for igroupby. "
98 "Only name and id are valid options.")
99
100
101 def ramachandran_plot(atomgroup, selection, outputfile1, outputfile2,
102 image_format='png'):
103 # plot standard mdanalysis and seaborn 2D with kde
104 R = Ramachandran(atomgroup).run()
105 fig, ax = plt.subplots(figsize=plt.figaspect(1))
106 R.plot(ax=ax, color='k', marker='.', ref=True)
107
108 a = R.angles.reshape(np.prod(R.angles.shape[:2]), 2)
109 # open hdf file
110 with h5py.File(args.o_data1, 'a') as f:
111 setname = "%s" % (selection)
112 f["/" + setname + "/ramachandran/phi"] = a[:, 0]
113 f["/" + setname + "/ramachandran/psi"] = a[:, 1]
114 plt.tight_layout()
115 # svg is better but sticking with png for now
116 plt.savefig(outputfile1, format=image_format)
117
118 sns.reset_defaults()
119 importlib.reload(plt)
120 importlib.reload(sns)
121 with sns.axes_style("white"):
122 h = sns.jointplot(x=a[:, 0], y=a[:, 1],
123 kind="kde", space=0)
124 h.set_axis_labels(r'$\phi$ (deg)', r'$\psi$ (deg)')
125 h.ax_joint.set_xlim(-180, 180)
126 h.ax_joint.set_ylim(-180, 180)
127 h.ax_joint.xaxis.set_major_locator(ticker.MultipleLocator(60))
128 h.ax_joint.yaxis.set_major_locator(ticker.MultipleLocator(60))
129 plt.savefig(outputfile2, format=image_format, bbox_inches='tight')
130
131
132 def get_base64_encoded_image(image_path):
133 """ encode image to string for use in html later"""
134 with open(image_path, "rb") as img_file:
135 return base64.b64encode(img_file.read()).decode('utf-8')
136
137
138 plots = []
139 if args.igroupby is not None:
140 for k, v in group_selections.items():
141 print(k, v)
142 try:
143 ramachandran_plot(v, str(k), "ramachandran1" +
144 str(k), "ramachandran2" + str(k))
145 plots.append({'Name': "%s" % (k), 'plot1':
146 get_base64_encoded_image("ramachandran1" + str(k)),
147 'plot2': get_base64_encoded_image("ramachandran2"
148 + str(k))})
149 except Exception as einstance:
150 print(type(einstance))
151 print(einstance.args)
152 print(einstance)
153
154 ramachandran_plot(r, selection, args.o_plot1, args.o_plot2)
155 plots.insert(0, {'Name': selection, 'plot1': get_base64_encoded_image(
156 args.o_plot1), 'plot2': get_base64_encoded_image(args.o_plot2)})
157
158 template_environment = Environment(loader=FileSystemLoader(
159 currentpath), lstrip_blocks=True, trim_blocks=True)
160 template = template_environment.get_template(
161 'ramachandran_auto_protein_html.j2')
162 with open(args.o_html1, 'w+') as f:
163 f.write(template.render(title="Ramachandran Plots", plots=plots))