changeset 4:a842da7ef42b draft

"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
author chemteam
date Mon, 24 Aug 2020 16:37:11 -0400
parents 9a3a1f698fc6
children d60c274980f7
files end-to-end.py extract_rmsd.py macros.xml pca_cosine.xml ramachandran_auto_protein.py ramachandran_auto_protein_html.j2 ramachandran_plots.py test-data/test.yml
diffstat 8 files changed, 498 insertions(+), 63 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/end-to-end.py	Mon Aug 24 16:37:11 2020 -0400
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+import argparse
+import itertools
+import sys
+
+import MDAnalysis as mda
+
+import matplotlib
+import matplotlib.pyplot as plt
+
+import numpy as np
+import numpy.linalg
+
+matplotlib.use('Agg')  # noqa
+
+
+def parse_command_line(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--itraj', help='input traj')
+    parser.add_argument('--istr', help='input str')
+    parser.add_argument('--itrajext', help='input traj ext')
+    parser.add_argument('--istrext', help='input str ext')
+    parser.add_argument('--isegid1', help='segid 1')
+    parser.add_argument('--ilabel', help='plot label')
+    parser.add_argument('--ititle1', help='plot title')
+    parser.add_argument('--output1', help='output1 - timeseries')
+    parser.add_argument('--o_plot', help='End to End plot')
+    return parser.parse_args()
+
+
+args = parse_command_line(sys.argv)
+
+
+u = mda.Universe(args.istr, args.itraj,
+                 topology_format=args.istrext, format=args.itrajext)
+
+ntermatoms = "(segid %s and name N)" % \
+    (args.isegid1)
+ctermatoms = "(segid %s and name C)" % \
+    (args.isegid1)
+# not sure how robust this selection really is
+nterm = u.select_atoms(ntermatoms)[0]  # first atom named N
+cterm = u.select_atoms(ctermatoms)[-1]  # takes the last atom named 'C'
+
+enddist = []
+
+for ts in u.trajectory:  # iterate through all frames
+    r = cterm.position - nterm.position  # e-to-e vector from atom positions
+    d = numpy.linalg.norm(r)   # end-to-end distance
+    enddist.append((ts.frame, d))
+
+enddist = np.array(enddist)
+
+
+color = itertools.cycle(['r', 'b', 'gold'])
+
+fig, axs = plt.subplots(1, 2, sharex=False, sharey=False, tight_layout=True)
+
+params = {
+    'axes.labelsize': 8,
+    'legend.fontsize': 10,
+    'xtick.labelsize': 10,
+    'ytick.labelsize': 10,
+    'text.usetex': False,
+    'figure.figsize': [4.5, 4.5],
+    'figure.dpi': 300
+}
+plt.rcParams.update(params)
+
+axs[0].plot(enddist[:, 0], enddist[:, 1], 'r-', lw=2, label=args.ilabel)
+axs[0].set_xlabel("number of frames")
+axs[0].set_ylabel(r"End to end distance  ($\AA$)")
+axs[0].legend()
+
+n, bins, patches = axs[1].hist(enddist[:, 1], color=next(
+    color), label=args.ilabel, alpha=0.5, density=True, stacked=True)
+
+axs[1].legend()
+axs[1].set_ylabel('Density Normalised Frequency')
+axs[1].set_xlabel(r'End to end distance ($\AA$)')
+fig.suptitle(args.ititle1, fontsize=12, fontweight='bold')
+fig.subplots_adjust(top=0.45)
+
+print(
+    " \n".join(
+        [
+            'The End to End distance is measured between the following atoms:',
+            str(nterm),
+            str(cterm)]))
+
+# svg is better but sticking with png for now
+plt.savefig(args.o_plot, format='png')
+
+
+np.savetxt(args.output1, enddist, delimiter='\t')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_rmsd.py	Mon Aug 24 16:37:11 2020 -0400
@@ -0,0 +1,129 @@
+import argparse
+import json
+
+import MDAnalysis as m
+from MDAnalysis.analysis import align, rms
+from MDAnalysis.analysis.base import AnalysisFromFunction
+from MDAnalysis.coordinates.memory import MemoryReader
+
+import numpy as np
+
+
+def calc_rmsd(str_files, traj_files, ref_str, str_format, traj_format,
+              ref_str_format, filepath_out, group, start, end, step,
+              fitting_atoms):
+    """
+    the function will cycle through range 0 to no_t and load all files found.
+
+    str_files: text file with filepaths for structures, one on each line
+    traj_files: text file with filepaths for trajectories, one on each line
+    ref_str: reference structure for fitting
+    filepath_in: directory where the files are located
+    filepath_out: pickle file where results (3D matrix) should be saved to
+
+    group: atoms for which RMSD should be calculated;
+        use the MDAnalysis selection language
+    fitting_atoms: atoms used for str alignment prior to RMSD calculation;
+        use the MDAnalysis selection language
+
+    start: first trajectory frame to calculate RMSD
+    end: last trajectory frame to calculate RMSD
+    step: how frequently frames are sampled between start and end; obviously,
+        the larger the step, the quicker the script finishes
+    """
+
+    # open list of files
+    with open(str_files) as f1, open(traj_files) as f2:
+        str_file_list = f1.read().strip().split('\n')
+        traj_file_list = f2.read().strip().split('\n')
+
+        if sum(1 for line in f1) != sum(1 for line in f2):
+            raise IOError('Number of structure and trajectory files unequal.')
+
+    no_t = len(traj_file_list)
+
+    data = np.zeros((no_t, no_t,
+                    int((end - start)/step + ((end - start) % step > 0))))
+
+    # load files
+    universes = {}
+
+    for traj in range(no_t):
+        mobile = m.Universe(str_file_list[traj], traj_file_list[traj],
+                            format=traj_format, topology_format=str_format)
+        ref = m.Universe(ref_str, topology_format=ref_str_format)
+
+        mobile.trajectory[-1]  # set mobile trajectory to last frame
+        ref.trajectory[0]  # set reference trajectory to first frame
+
+        # perform alignment
+        align.AlignTraj(mobile, ref, select=fitting_atoms,
+                        in_memory=True).run()
+
+        grp = mobile.select_atoms(group)
+        universes[traj] = m.core.universe.Merge(grp)  # create Universe w grp
+        coordinates = AnalysisFromFunction(lambda ag: ag.positions.copy(),
+                                           grp).run().results  # write to uv
+        universes[traj].load_new(coordinates, format=MemoryReader)
+
+    print("All trajs loaded by MDAnalysis")
+
+    # calculate differences
+    for traj1 in range(no_t):
+        print("Calculating differences for traj {}".format(traj1))
+        for traj2 in range(traj1):
+
+            u1 = universes[traj1]
+            u2 = universes[traj2]
+
+            l1 = u1.select_atoms(group)
+            l2 = u2.select_atoms(group)
+
+            rmsd = rms.RMSD(l1, l2)
+
+            rmsd.run()
+
+            data[traj1, traj2] = rmsd.rmsd[:, 2]
+            data[traj2, traj1] = rmsd.rmsd[:, 2]
+
+    with open(filepath_out, 'w') as f:
+        json.dump(data.tolist(), f, indent=4, sort_keys=True)
+
+    print("Done!")
+    return
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--trajs', required=True,
+                        help='File containing trajectory filepaths.')
+    parser.add_argument("--strs",
+                        help='File containing structure filepaths.')
+    parser.add_argument("--ref-str",
+                        help='File containing reference structure.')
+    parser.add_argument('--traj-format', required=True,
+                        help='Trajectory format.')
+    parser.add_argument("--str-format", help='Structure format.')
+    parser.add_argument("--ref-str-format",
+                        help='Reference structure format.')
+    parser.add_argument('-o', '--outfile',
+                        help="Path to the output JSON file")
+    parser.add_argument('--group', help="Atoms for which RMSD should be"
+                        "calculated in MDAnalysis selection language")
+    parser.add_argument('--fitting', help="Fitting atoms for alignment"
+                        "prior to RMSD calculation")
+    parser.add_argument('--start', type=int,
+                        help="First trajectory frame to calculate RMSD")
+    parser.add_argument('--end', type=int,
+                        help="Last trajectory frame to calculate RMSD")
+    parser.add_argument('--step', type=int,
+                        help="Frame sampling frequency for RMSD calculation")
+    args = parser.parse_args()
+
+    calc_rmsd(args.strs, args.trajs, args.ref_str, args.str_format,
+              args.traj_format, args.ref_str_format, args.outfile,
+              args.group, args.start, args.end, args.step, args.fitting)
+
+
+if __name__ == "__main__":
+    main()
--- a/macros.xml	Wed May 20 13:03:58 2020 -0400
+++ b/macros.xml	Mon Aug 24 16:37:11 2020 -0400
@@ -1,36 +1,36 @@
 <macros>
-    <token name="@VERSION@">0.20</token>
+    <token name="@TOOL_VERSION@">1.0.0</token>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="0.20.1">mdanalysis</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">mdanalysis</requirement>
             <yield/>
         </requirements>
     </xml>
     <xml name="analysis_inputs">
-        <param format="dcd,xtc" name="trajin" type="data" label="DCD/XTC trajectory input"/>
-        <param format="pdb,gro" name="strin" type="data" label="PDB/GRO input"/>
+        <param format="dcd,xtc" name="trajin" type="data" label="DCD/XTC trajectory input" />
+        <param format="pdb,gro" name="strin" type="data" label="PDB/GRO input" />
         <yield/>
     </xml>
     <xml name="sanitizer">
         <sanitizer invalid_char="">
-            <valid initial="string.ascii_letters,string.digits"/>
-         </sanitizer>
-         <yield/>
+            <valid initial="string.ascii_letters,string.digits" />
+        </sanitizer>
+        <yield/>
     </xml>
     <xml name="sanitizer_resids">
         <sanitizer invalid_char="">
-            <valid initial="string.digits"/>
+            <valid initial="string.digits" />
         </sanitizer>
         <yield/>
     </xml>
     <xml name="tests_inputs">
-        <param name="trajin" value="test.dcd" ftype="dcd"/>
-        <param name="strin" value="test.pdb" ftype="pdb"/>
+        <param name="trajin" value="test.dcd" ftype="dcd" />
+        <param name="strin" value="test.pdb" ftype="pdb" />
         <yield/>
     </xml>
     <xml name="tests_inputs_gmx">
-        <param name="trajin" value="test.xtc" ftype="xtc"/>
-        <param name="strin" value="test.gro" ftype="gro"/>
+        <param name="trajin" value="test.xtc" ftype="xtc" />
+        <param name="strin" value="test.gro" ftype="gro" />
         <yield/>
     </xml>
     <xml name="citations">
--- a/pca_cosine.xml	Wed May 20 13:03:58 2020 -0400
+++ b/pca_cosine.xml	Mon Aug 24 16:37:11 2020 -0400
@@ -1,7 +1,8 @@
-<tool id="mdanalysis_cosine_analysis" name="Cosine Content" version="@VERSION@">
+<tool id="mdanalysis_cosine_analysis" name="Cosine Content" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
     <description>- measure the cosine content of the PCA projection</description>
     <macros>
         <import>macros.xml</import>
+        <token name="@GALAXY_VERSION@">0</token>
     </macros>
     <expand macro="requirements" />
     <command detect_errors="exit_code">
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ramachandran_auto_protein.py	Mon Aug 24 16:37:11 2020 -0400
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+
+import argparse
+import base64
+import importlib
+import sys
+
+import MDAnalysis as mda
+from MDAnalysis.analysis.dihedrals import Ramachandran
+
+import h5py
+
+from jinja2 import Environment, FileSystemLoader
+
+import matplotlib
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+
+import numpy as np
+import numpy.linalg
+
+import seaborn as sns
+
+
+matplotlib.use('Agg')  # noqa
+
+
+def parse_command_line(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--itraj', help='input traj')
+    parser.add_argument('--istr', help='input str')
+    parser.add_argument('--itrajext', help='input traj ext')
+    parser.add_argument('--istrext', help='input str ext')
+    parser.add_argument('--isegid1', help='segid 1')
+    parser.add_argument('--iresid1', help='resid start')
+    parser.add_argument('--iresid2', help='resid end')
+    parser.add_argument('--iresname', help='resname e.g. ALA')
+    parser.add_argument('--igroupby', help='groupby names or ids')
+    parser.add_argument('--itemplatepath', help='template path')
+    parser.add_argument('--o_plot1', help='MDA Ramachandran plot')
+    parser.add_argument('--o_plot2', help='Seaborn Ramachandran plot')
+    parser.add_argument('--o_data1', help='Timeseries in HDF5 format')
+    parser.add_argument('--o_html1', help='Html overview output of all plots')
+    return parser.parse_args()
+
+
+args = parse_command_line(sys.argv)
+
+currentpath = "."
+if args.itemplatepath is not None:
+    currentpath = args.itemplatepath
+
+
+u = mda.Universe(args.istr, args.itraj,
+                 topology_format=args.istrext, format=args.itrajext)
+selection = "(segid %s)" % \
+    (args.isegid1)
+
+if args.iresname is not None:
+    selection = "(segid %s and resname %s)" % \
+        (args.isegid1, args.iresname)
+
+if args.iresid1 is not None and args.iresid2 is not None:
+    assert(int(args.iresid1) > 0), "ResID numbering starts at 1 for this tool."
+    assert(int(args.iresid2) > 0), "ResID numbering starts at 1 for this tool."
+    assert(int(args.iresid2) > int(args.iresid1)
+           ), "ResID2 must be at least ResID1+1"
+    selection = "(segid %s and resid %s-%s)" % \
+        (args.isegid1, int(args.iresid1), int(args.iresid2))
+    if args.iresname is not None:
+        selection = "(segid %s and resid %s-%s and resname %s)" % \
+            (args.isegid1, int(args.iresid1), int(args.iresid2), args.iresname)
+
+r = u.select_atoms(selection)
+
+assert(r != u.select_atoms('name thiscannotpossiblyexist')
+       ), \
+               """The selection you specified returns an empty result.
+               Check segment names and residue ID's. Also check the
+                structure and trajectory file selected are the correct ones"""
+
+if args.igroupby is not None:
+    group_selections = {}  # dictionary of selections
+    if args.igroupby == 'name':
+        groupby = sorted(list(set(r.resnames)))
+        for e in groupby:
+            s = r & u.select_atoms("resname %s" % e)
+            this_sel = "%s and resname %s" % (selection, e)
+            group_selections[this_sel] = s
+    elif args.igroupby == 'id':
+        groupby = sorted(list(set(r.resids)))
+        for e in groupby:
+            s = r & u.select_atoms("resid %s" % e)
+            this_sel = "%s and resid %s" % (selection, e)
+            group_selections[this_sel] = s
+    else:
+        assert False, ("Invalid argument for igroupby. "
+                       "Only name and id are valid options.")
+
+
+def ramachandran_plot(atomgroup, selection, outputfile1, outputfile2,
+                      image_format='png'):
+    # plot standard mdanalysis and seaborn 2D with kde
+    R = Ramachandran(atomgroup).run()
+    fig, ax = plt.subplots(figsize=plt.figaspect(1))
+    R.plot(ax=ax, color='k', marker='.', ref=True)
+
+    a = R.angles.reshape(np.prod(R.angles.shape[:2]), 2)
+    # open hdf file
+    with h5py.File(args.o_data1, 'a') as f:
+        setname = "%s" % (selection)
+        f["/" + setname + "/ramachandran/phi"] = a[:, 0]
+        f["/" + setname + "/ramachandran/psi"] = a[:, 1]
+    plt.tight_layout()
+    # svg is better but sticking with png for now
+    plt.savefig(outputfile1, format=image_format)
+
+    sns.reset_defaults()
+    importlib.reload(plt)
+    importlib.reload(sns)
+    with sns.axes_style("white"):
+        h = sns.jointplot(x=a[:, 0], y=a[:, 1],
+                          kind="kde", space=0)
+        h.set_axis_labels(r'$\phi$ (deg)', r'$\psi$ (deg)')
+        h.ax_joint.set_xlim(-180, 180)
+        h.ax_joint.set_ylim(-180, 180)
+        h.ax_joint.xaxis.set_major_locator(ticker.MultipleLocator(60))
+        h.ax_joint.yaxis.set_major_locator(ticker.MultipleLocator(60))
+        plt.savefig(outputfile2, format=image_format, bbox_inches='tight')
+
+
+def get_base64_encoded_image(image_path):
+    """  encode image to string for use in html later"""
+    with open(image_path, "rb") as img_file:
+        return base64.b64encode(img_file.read()).decode('utf-8')
+
+
+plots = []
+if args.igroupby is not None:
+    for k, v in group_selections.items():
+        print(k, v)
+        try:
+            ramachandran_plot(v, str(k), "ramachandran1" +
+                              str(k), "ramachandran2" + str(k))
+            plots.append({'Name': "%s" % (k), 'plot1':
+                         get_base64_encoded_image("ramachandran1" + str(k)),
+                         'plot2': get_base64_encoded_image("ramachandran2"
+                          + str(k))})
+        except Exception as einstance:
+            print(type(einstance))
+            print(einstance.args)
+            print(einstance)
+
+ramachandran_plot(r, selection, args.o_plot1, args.o_plot2)
+plots.insert(0, {'Name': selection, 'plot1': get_base64_encoded_image(
+    args.o_plot1), 'plot2': get_base64_encoded_image(args.o_plot2)})
+
+template_environment = Environment(loader=FileSystemLoader(
+    currentpath), lstrip_blocks=True, trim_blocks=True)
+template = template_environment.get_template(
+    'ramachandran_auto_protein_html.j2')
+with open(args.o_html1, 'w+') as f:
+    f.write(template.render(title="Ramachandran Plots", plots=plots))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ramachandran_auto_protein_html.j2	Mon Aug 24 16:37:11 2020 -0400
@@ -0,0 +1,25 @@
+<html>
+
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+  <title>{{ title }}</title>
+</head>
+
+<body>
+  <table>
+    <tr>
+      <th>Selection</th>
+      <th>Ramachandran scatter plot</th>
+      <th>Ramachandran histogram </th>
+    </tr>
+    {% for plot in plots %}
+    <tr>
+      <td>{{ plot['Name'] }}</td>
+      <td style="vertical-align:center"><img src="data:image/png;base64,{{plot['plot1']}}" /> </td>
+      <td style="vertical-align:center"><img src="data:image/png;base64,{{plot['plot2']}}" /> </td>
+    </tr>
+    {% endfor %}
+  </table>
+</body>
+
+</html>
--- a/ramachandran_plots.py	Wed May 20 13:03:58 2020 -0400
+++ b/ramachandran_plots.py	Mon Aug 24 16:37:11 2020 -0400
@@ -10,11 +10,16 @@
 
 import matplotlib
 import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+
 
 import numpy as np
 
 import seaborn as sns
 
+
+import yaml
+
 matplotlib.use('Agg')  # noqa
 
 
@@ -24,30 +29,7 @@
     parser.add_argument('--istr', help='input str')
     parser.add_argument('--itrajext', help='input traj ext')
     parser.add_argument('--istrext', help='input str ext')
-    parser.add_argument('--isegid1', help='segid 1')
-    parser.add_argument('--iresid1', help='resid 1')
-    parser.add_argument('--iname1', help='name 1')
-    parser.add_argument('--isegid2', help='segid 2')
-    parser.add_argument('--iresid2', help='resid 2')
-    parser.add_argument('--iname2', help='name 2')
-    parser.add_argument('--isegid3', help='segid 3')
-    parser.add_argument('--iresid3', help='resid 3')
-    parser.add_argument('--iname3', help='name 3')
-    parser.add_argument('--isegid4', help='segid 4')
-    parser.add_argument('--iresid4', help='resid 4')
-    parser.add_argument('--iname4', help='name 4')
-    parser.add_argument('--isegid5', help='segid 1')
-    parser.add_argument('--iresid5', help='resid 1')
-    parser.add_argument('--iname5', help='name 1')
-    parser.add_argument('--isegid6', help='segid 2')
-    parser.add_argument('--iresid6', help='resid 2')
-    parser.add_argument('--iname6', help='name 2')
-    parser.add_argument('--isegid7', help='segid 3')
-    parser.add_argument('--iresid7', help='resid 3')
-    parser.add_argument('--iname7', help='name 3')
-    parser.add_argument('--isegid8', help='segid 4')
-    parser.add_argument('--iresid8', help='resid 4')
-    parser.add_argument('--iname8', help='name 4')
+    parser.add_argument('--iyml', help='input in yml format')
     parser.add_argument('--output', help='output')
     parser.add_argument('--oramachandran_plot', help='dihedral plot')
     return parser.parse_args()
@@ -55,35 +37,35 @@
 
 args = parse_command_line(sys.argv)
 
+with open(args.iyml) as file:
+    params = yaml.load(file, Loader=yaml.FullLoader)
+
 Dihedral = namedtuple(
     'Dihedral', ['atom1', 'atom2', 'atom3', 'atom4'])
 
+for k, v in params.items():
+    for a in ['phi', 'psi']:
+        assert (a in v), "Key %s is missing in inputs: %s " % (a, k)
+        atoms = []
+        for b in ['atom1', 'atom2', 'atom3', 'atom4']:
+            assert (b in v[a]), "Key %s is missing in inputs: %s %s" % (
+                b, k, a)
+            for c in ['segid', 'resid', 'name']:
+                assert (c in v[a][b]), \
+                        "Key %s is missing in inputs: %s %s %s " % (c, k, a, b)
+            atoms.append("(segid %s and resid %s and name %s)" %
+                         (v[a][b]['segid'], v[a][b]['resid'], v[a][b]['name']))
+        print(atoms)
+        if a == 'phi':
+            dihe_phi = Dihedral(atoms[0], atoms[1], atoms[2], atoms[3])
+        if a == 'psi':
+            dihe_psi = Dihedral(atoms[0], atoms[1], atoms[2], atoms[3])
+
 # order of dihedral atom is the crystallographic definition
 # (see glycanstructure.org)
 
-# phi
-atom1 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid1, args.iresid1, args.iname1)
-atom2 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid2, args.iresid2, args.iname2)
-atom3 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid3, args.iresid3, args.iname3)
-atom4 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid4, args.iresid4, args.iname4)
-
-dihe_phi = Dihedral(atom1, atom2, atom3, atom4)
-
-# psi
-atom1 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid5, args.iresid5, args.iname5)
-atom2 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid6, args.iresid6, args.iname6)
-atom3 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid7, args.iresid7, args.iname7)
-atom4 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid8, args.iresid8, args.iname8)
-
-dihe_psi = Dihedral(atom1, atom2, atom3, atom4)
+assert(dihe_phi), "phi dihedral doesn't exist"
+assert(dihe_psi), "psi dihedral doesn't exist"
 
 
 def calc_torsion(dihedral):
@@ -120,8 +102,11 @@
     writer.writerows(zip(phi_frame, phi_series, psi_series))
 
 with sns.axes_style("white"):
-    h = sns.jointplot(x=phi_series, y=psi_series, kind="kde", legend=True)
-    h.set_axis_labels(r'$\Phi$ (degrees)', r'$\Psi$ (degrees)')
+    h = sns.jointplot(x=phi_series, y=psi_series,
+                      kind="kde", space=0, legend=True)
+    h.set_axis_labels(r'$\phi$ (degrees)', r'$\psi$ (degrees)')
     h.ax_joint.set_xlim(-180, 180)
     h.ax_joint.set_ylim(-180, 180)
-    plt.savefig(args.oramachandran_plot, format='png')
+    h.ax_joint.xaxis.set_major_locator(ticker.MultipleLocator(60))
+    h.ax_joint.yaxis.set_major_locator(ticker.MultipleLocator(60))
+    plt.savefig(args.oramachandran_plot, format='png', bbox_inches='tight')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.yml	Mon Aug 24 16:37:11 2020 -0400
@@ -0,0 +1,36 @@
+ramachandran1:
+  phi:
+    atom1:
+     segid: HET
+     resid: 3
+     name: O5
+    atom2:
+     segid: HET
+     resid: 3
+     name: C1
+    atom3:
+     segid: HET
+     resid: 2
+     name: O4
+    atom4:
+     segid: HET
+     resid: 2
+     name: C4
+  psi:
+    atom1:
+     segid: HET
+     resid: 3
+     name: C1
+    atom2:
+     segid: HET
+     resid: 2
+     name: O4
+    atom3:
+     segid: HET
+     resid: 2
+     name: C4
+    atom4:
+     segid: HET
+     resid: 2
+     name: C5
+  comment: pick visually using VMD using labels. Go to labels, dihedral to see the information about resname resid and atomname and then lookup the segname for ach atom.