Galaxy |

Changeset 4:70a2d548e62c (2020-08-24)

Previous changeset 3:8bd0e29927da (2020-05-20) Next changeset 5:af9f01ca6a5c (2020-10-28)

Commit message:
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"

modified:
macros.xml
ramachandran_plots.py
ramachandran_plots.xml

added:
end-to-end.py
extract_rmsd.py
ramachandran_auto_protein.py
ramachandran_auto_protein_html.j2
test-data/test.yml

diff -r 8bd0e29927da -r 70a2d548e62c end-to-end.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/end-to-end.py Mon Aug 24 16:23:14 2020 -0400

[

@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+import argparse
+import itertools
+import sys
+
+import MDAnalysis as mda
+
+import matplotlib
+import matplotlib.pyplot as plt
+
+import numpy as np
+import numpy.linalg
+
+matplotlib.use('Agg')  # noqa
+
+
+def parse_command_line(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--itraj', help='input traj')
+    parser.add_argument('--istr', help='input str')
+    parser.add_argument('--itrajext', help='input traj ext')
+    parser.add_argument('--istrext', help='input str ext')
+    parser.add_argument('--isegid1', help='segid 1')
+    parser.add_argument('--ilabel', help='plot label')
+    parser.add_argument('--ititle1', help='plot title')
+    parser.add_argument('--output1', help='output1 - timeseries')
+    parser.add_argument('--o_plot', help='End to End plot')
+    return parser.parse_args()
+
+
+args = parse_command_line(sys.argv)
+
+
+u = mda.Universe(args.istr, args.itraj,
+                 topology_format=args.istrext, format=args.itrajext)
+
+ntermatoms = "(segid %s and name N)" % \
+    (args.isegid1)
+ctermatoms = "(segid %s and name C)" % \
+    (args.isegid1)
+# not sure how robust this selection really is
+nterm = u.select_atoms(ntermatoms)[0]  # first atom named N
+cterm = u.select_atoms(ctermatoms)[-1]  # takes the last atom named 'C'
+
+enddist = []
+
+for ts in u.trajectory:  # iterate through all frames
+    r = cterm.position - nterm.position  # e-to-e vector from atom positions
+    d = numpy.linalg.norm(r)   # end-to-end distance
+    enddist.append((ts.frame, d))
+
+enddist = np.array(enddist)
+
+
+color = itertools.cycle(['r', 'b', 'gold'])
+
+fig, axs = plt.subplots(1, 2, sharex=False, sharey=False, tight_layout=True)
+
+params = {
+    'axes.labelsize': 8,
+    'legend.fontsize': 10,
+    'xtick.labelsize': 10,
+    'ytick.labelsize': 10,
+    'text.usetex': False,
+    'figure.figsize': [4.5, 4.5],
+    'figure.dpi': 300
+}
+plt.rcParams.update(params)
+
+axs[0].plot(enddist[:, 0], enddist[:, 1], 'r-', lw=2, label=args.ilabel)
+axs[0].set_xlabel("number of frames")
+axs[0].set_ylabel(r"End to end distance  ($\AA$)")
+axs[0].legend()
+
+n, bins, patches = axs[1].hist(enddist[:, 1], color=next(
+    color), label=args.ilabel, alpha=0.5, density=True, stacked=True)
+
+axs[1].legend()
+axs[1].set_ylabel('Density Normalised Frequency')
+axs[1].set_xlabel(r'End to end distance ($\AA$)')
+fig.suptitle(args.ititle1, fontsize=12, fontweight='bold')
+fig.subplots_adjust(top=0.45)
+
+print(
+    " \n".join(
+        [
+            'The End to End distance is measured between the following atoms:',
+            str(nterm),
+            str(cterm)]))
+
+# svg is better but sticking with png for now
+plt.savefig(args.o_plot, format='png')
+
+
+np.savetxt(args.output1, enddist, delimiter='\t')

diff -r 8bd0e29927da -r 70a2d548e62c extract_rmsd.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_rmsd.py Mon Aug 24 16:23:14 2020 -0400

[

@@ -0,0 +1,129 @@
+import argparse
+import json
+
+import MDAnalysis as m
+from MDAnalysis.analysis import align, rms
+from MDAnalysis.analysis.base import AnalysisFromFunction
+from MDAnalysis.coordinates.memory import MemoryReader
+
+import numpy as np
+
+
+def calc_rmsd(str_files, traj_files, ref_str, str_format, traj_format,
+              ref_str_format, filepath_out, group, start, end, step,
+              fitting_atoms):
+    """
+    the function will cycle through range 0 to no_t and load all files found.
+
+    str_files: text file with filepaths for structures, one on each line
+    traj_files: text file with filepaths for trajectories, one on each line
+    ref_str: reference structure for fitting
+    filepath_in: directory where the files are located
+    filepath_out: pickle file where results (3D matrix) should be saved to
+
+    group: atoms for which RMSD should be calculated;
+        use the MDAnalysis selection language
+    fitting_atoms: atoms used for str alignment prior to RMSD calculation;
+        use the MDAnalysis selection language
+
+    start: first trajectory frame to calculate RMSD
+    end: last trajectory frame to calculate RMSD
+    step: how frequently frames are sampled between start and end; obviously,
+        the larger the step, the quicker the script finishes
+    """
+
+    # open list of files
+    with open(str_files) as f1, open(traj_files) as f2:
+        str_file_list = f1.read().strip().split('\n')
+        traj_file_list = f2.read().strip().split('\n')
+
+        if sum(1 for line in f1) != sum(1 for line in f2):
+            raise IOError('Number of structure and trajectory files unequal.')
+
+    no_t = len(traj_file_list)
+
+    data = np.zeros((no_t, no_t,
+                    int((end - start)/step + ((end - start) % step > 0))))
+
+    # load files
+    universes = {}
+
+    for traj in range(no_t):
+        mobile = m.Universe(str_file_list[traj], traj_file_list[traj],
+                            format=traj_format, topology_format=str_format)
+        ref = m.Universe(ref_str, topology_format=ref_str_format)
+
+        mobile.trajectory[-1]  # set mobile trajectory to last frame
+        ref.trajectory[0]  # set reference trajectory to first frame
+
+        # perform alignment
+        align.AlignTraj(mobile, ref, select=fitting_atoms,
+                        in_memory=True).run()
+
+        grp = mobile.select_atoms(group)
+        universes[traj] = m.core.universe.Merge(grp)  # create Universe w grp
+        coordinates = AnalysisFromFunction(lambda ag: ag.positions.copy(),
+                                           grp).run().results  # write to uv
+        universes[traj].load_new(coordinates, format=MemoryReader)
+
+    print("All trajs loaded by MDAnalysis")
+
+    # calculate differences
+    for traj1 in range(no_t):
+        print("Calculating differences for traj {}".format(traj1))
+        for traj2 in range(traj1):
+
+            u1 = universes[traj1]
+            u2 = universes[traj2]
+
+            l1 = u1.select_atoms(group)
+            l2 = u2.select_atoms(group)
+
+            rmsd = rms.RMSD(l1, l2)
+
+            rmsd.run()
+
+            data[traj1, traj2] = rmsd.rmsd[:, 2]
+            data[traj2, traj1] = rmsd.rmsd[:, 2]
+
+    with open(filepath_out, 'w') as f:
+        json.dump(data.tolist(), f, indent=4, sort_keys=True)
+
+    print("Done!")
+    return
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--trajs', required=True,
+                        help='File containing trajectory filepaths.')
+    parser.add_argument("--strs",
+                        help='File containing structure filepaths.')
+    parser.add_argument("--ref-str",
+                        help='File containing reference structure.')
+    parser.add_argument('--traj-format', required=True,
+                        help='Trajectory format.')
+    parser.add_argument("--str-format", help='Structure format.')
+    parser.add_argument("--ref-str-format",
+                        help='Reference structure format.')
+    parser.add_argument('-o', '--outfile',
+                        help="Path to the output JSON file")
+    parser.add_argument('--group', help="Atoms for which RMSD should be"
+                        "calculated in MDAnalysis selection language")
+    parser.add_argument('--fitting', help="Fitting atoms for alignment"
+                        "prior to RMSD calculation")
+    parser.add_argument('--start', type=int,
+                        help="First trajectory frame to calculate RMSD")
+    parser.add_argument('--end', type=int,
+                        help="Last trajectory frame to calculate RMSD")
+    parser.add_argument('--step', type=int,
+                        help="Frame sampling frequency for RMSD calculation")
+    args = parser.parse_args()
+
+    calc_rmsd(args.strs, args.trajs, args.ref_str, args.str_format,
+              args.traj_format, args.ref_str_format, args.outfile,
+              args.group, args.start, args.end, args.step, args.fitting)
+
+
+if __name__ == "__main__":
+    main()

diff -r 8bd0e29927da -r 70a2d548e62c macros.xml
--- a/macros.xml Wed May 20 13:03:30 2020 -0400
+++ b/macros.xml Mon Aug 24 16:23:14 2020 -0400

@@ -1,36 +1,36 @@
<macros>
-    <token name="@VERSION@">0.20</token>
+    <token name="@TOOL_VERSION@">1.0.0</token>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="0.20.1">mdanalysis</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">mdanalysis</requirement>
             <yield/>
         </requirements>
     </xml>
     <xml name="analysis_inputs">
-        <param format="dcd,xtc" name="trajin" type="data" label="DCD/XTC trajectory input"/>
-        <param format="pdb,gro" name="strin" type="data" label="PDB/GRO input"/>
+        <param format="dcd,xtc" name="trajin" type="data" label="DCD/XTC trajectory input" />
+        <param format="pdb,gro" name="strin" type="data" label="PDB/GRO input" />
         <yield/>
     </xml>
     <xml name="sanitizer">
         <sanitizer invalid_char="">
-            <valid initial="string.ascii_letters,string.digits"/>
-         </sanitizer>
-         <yield/>
+            <valid initial="string.ascii_letters,string.digits" />
+        </sanitizer>
+        <yield/>
     </xml>
     <xml name="sanitizer_resids">
         <sanitizer invalid_char="">
-            <valid initial="string.digits"/>
+            <valid initial="string.digits" />
         </sanitizer>
         <yield/>
     </xml>
     <xml name="tests_inputs">
-        <param name="trajin" value="test.dcd" ftype="dcd"/>
-        <param name="strin" value="test.pdb" ftype="pdb"/>
+        <param name="trajin" value="test.dcd" ftype="dcd" />
+        <param name="strin" value="test.pdb" ftype="pdb" />
         <yield/>
     </xml>
     <xml name="tests_inputs_gmx">
-        <param name="trajin" value="test.xtc" ftype="xtc"/>
-        <param name="strin" value="test.gro" ftype="gro"/>
+        <param name="trajin" value="test.xtc" ftype="xtc" />
+        <param name="strin" value="test.gro" ftype="gro" />
         <yield/>
     </xml>
     <xml name="citations">

diff -r 8bd0e29927da -r 70a2d548e62c ramachandran_auto_protein.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ramachandran_auto_protein.py Mon Aug 24 16:23:14 2020 -0400

[

@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+
+import argparse
+import base64
+import importlib
+import sys
+
+import MDAnalysis as mda
+from MDAnalysis.analysis.dihedrals import Ramachandran
+
+import h5py
+
+from jinja2 import Environment, FileSystemLoader
+
+import matplotlib
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+
+import numpy as np
+import numpy.linalg
+
+import seaborn as sns
+
+
+matplotlib.use('Agg')  # noqa
+
+
+def parse_command_line(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--itraj', help='input traj')
+    parser.add_argument('--istr', help='input str')
+    parser.add_argument('--itrajext', help='input traj ext')
+    parser.add_argument('--istrext', help='input str ext')
+    parser.add_argument('--isegid1', help='segid 1')
+    parser.add_argument('--iresid1', help='resid start')
+    parser.add_argument('--iresid2', help='resid end')
+    parser.add_argument('--iresname', help='resname e.g. ALA')
+    parser.add_argument('--igroupby', help='groupby names or ids')
+    parser.add_argument('--itemplatepath', help='template path')
+    parser.add_argument('--o_plot1', help='MDA Ramachandran plot')
+    parser.add_argument('--o_plot2', help='Seaborn Ramachandran plot')
+    parser.add_argument('--o_data1', help='Timeseries in HDF5 format')
+    parser.add_argument('--o_html1', help='Html overview output of all plots')
+    return parser.parse_args()
+
+
+args = parse_command_line(sys.argv)
+
+currentpath = "."
+if args.itemplatepath is not None:
+    currentpath = args.itemplatepath
+
+
+u = mda.Universe(args.istr, args.itraj,
+                 topology_format=args.istrext, format=args.itrajext)
+selection = "(segid %s)" % \
+    (args.isegid1)
+
+if args.iresname is not None:
+    selection = "(segid %s and resname %s)" % \
+        (args.isegid1, args.iresname)
+
+if args.iresid1 is not None and args.iresid2 is not None:
+    assert(int(args.iresid1) > 0), "ResID numbering starts at 1 for this tool."
+    assert(int(args.iresid2) > 0), "ResID numbering starts at 1 for this tool."
+    assert(int(args.iresid2) > int(args.iresid1)
+           ), "ResID2 must be at least ResID1+1"
+    selection = "(segid %s and resid %s-%s)" % \
+        (args.isegid1, int(args.iresid1), int(args.iresid2))
+    if args.iresname is not None:
+        selection = "(segid %s and resid %s-%s and resname %s)" % \
+            (args.isegid1, int(args.iresid1), int(args.iresid2), args.iresname)
+
+r = u.select_atoms(selection)
+
+assert(r != u.select_atoms('name thiscannotpossiblyexist')
+       ), \
+               """The selection you specified returns an empty result.
+               Check segment names and residue ID's. Also check the
+                structure and trajectory file selected are the correct ones"""
+
+if args.igroupby is not None:
+    group_selections = {}  # dictionary of selections
+    if args.igroupby == 'name':
+        groupby = sorted(list(set(r.resnames)))
+        for e in groupby:
+            s = r & u.select_atoms("resname %s" % e)
+            this_sel = "%s and resname %s" % (selection, e)
+            group_selections[this_sel] = s
+    elif args.igroupby == 'id':
+        groupby = sorted(list(set(r.resids)))
+        for e in groupby:
+            s = r & u.select_atoms("resid %s" % e)
+            this_sel = "%s and resid %s" % (selection, e)
+            group_selections[this_sel] = s
+    else:
+        assert False, ("Invalid argument for igroupby. "
+                       "Only name and id are valid options.")
+
+
+def ramachandran_plot(atomgroup, selection, outputfile1, outputfile2,
+                      image_format='png'):
+    # plot standard mdanalysis and seaborn 2D with kde
+    R = Ramachandran(atomgroup).run()
+    fig, ax = plt.subplots(figsize=plt.figaspect(1))
+    R.plot(ax=ax, color='k', marker='.', ref=True)
+
+    a = R.angles.reshape(np.prod(R.angles.shape[:2]), 2)
+    # open hdf file
+    with h5py.File(args.o_data1, 'a') as f:
+        setname = "%s" % (selection)
+        f["/" + setname + "/ramachandran/phi"] = a[:, 0]
+        f["/" + setname + "/ramachandran/psi"] = a[:, 1]
+    plt.tight_layout()
+    # svg is better but sticking with png for now
+    plt.savefig(outputfile1, format=image_format)
+
+    sns.reset_defaults()
+    importlib.reload(plt)
+    importlib.reload(sns)
+    with sns.axes_style("white"):
+        h = sns.jointplot(x=a[:, 0], y=a[:, 1],
+                          kind="kde", space=0)
+        h.set_axis_labels(r'$\phi$ (deg)', r'$\psi$ (deg)')
+        h.ax_joint.set_xlim(-180, 180)
+        h.ax_joint.set_ylim(-180, 180)
+        h.ax_joint.xaxis.set_major_locator(ticker.MultipleLocator(60))
+        h.ax_joint.yaxis.set_major_locator(ticker.MultipleLocator(60))
+        plt.savefig(outputfile2, format=image_format, bbox_inches='tight')
+
+
+def get_base64_encoded_image(image_path):
+    """  encode image to string for use in html later"""
+    with open(image_path, "rb") as img_file:
+        return base64.b64encode(img_file.read()).decode('utf-8')
+
+
+plots = []
+if args.igroupby is not None:
+    for k, v in group_selections.items():
+        print(k, v)
+        try:
+            ramachandran_plot(v, str(k), "ramachandran1" +
+                              str(k), "ramachandran2" + str(k))
+            plots.append({'Name': "%s" % (k), 'plot1':
+                         get_base64_encoded_image("ramachandran1" + str(k)),
+                         'plot2': get_base64_encoded_image("ramachandran2"
+                          + str(k))})
+        except Exception as einstance:
+            print(type(einstance))
+            print(einstance.args)
+            print(einstance)
+
+ramachandran_plot(r, selection, args.o_plot1, args.o_plot2)
+plots.insert(0, {'Name': selection, 'plot1': get_base64_encoded_image(
+    args.o_plot1), 'plot2': get_base64_encoded_image(args.o_plot2)})
+
+template_environment = Environment(loader=FileSystemLoader(
+    currentpath), lstrip_blocks=True, trim_blocks=True)
+template = template_environment.get_template(
+    'ramachandran_auto_protein_html.j2')
+with open(args.o_html1, 'w+') as f:
+    f.write(template.render(title="Ramachandran Plots", plots=plots))

diff -r 8bd0e29927da -r 70a2d548e62c ramachandran_auto_protein_html.j2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ramachandran_auto_protein_html.j2 Mon Aug 24 16:23:14 2020 -0400

[

@@ -0,0 +1,25 @@
+<html>
+
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+  <title>{{ title }}</title>
+</head>
+
+<body>
+  <table>
+    <tr>
+      <th>Selection</th>
+      <th>Ramachandran scatter plot</th>
+      <th>Ramachandran histogram </th>
+    </tr>
+    {% for plot in plots %}
+    <tr>
+      <td>{{ plot['Name'] }}</td>
+      <td style="vertical-align:center"><img src="data:image/png;base64,{{plot['plot1']}}" /> </td>
+      <td style="vertical-align:center"><img src="data:image/png;base64,{{plot['plot2']}}" /> </td>
+    </tr>
+    {% endfor %}
+  </table>
+</body>
+
+</html>

diff -r 8bd0e29927da -r 70a2d548e62c ramachandran_plots.py
--- a/ramachandran_plots.py Wed May 20 13:03:30 2020 -0400
+++ b/ramachandran_plots.py Mon Aug 24 16:23:14 2020 -0400

[

@@ -10,11 +10,16 @@

import matplotlib
import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+

import numpy as np

import seaborn as sns

+
+import yaml
+
matplotlib.use('Agg')  # noqa

@@ -24,30 +29,7 @@
     parser.add_argument('--istr', help='input str')
     parser.add_argument('--itrajext', help='input traj ext')
     parser.add_argument('--istrext', help='input str ext')
-    parser.add_argument('--isegid1', help='segid 1')
-    parser.add_argument('--iresid1', help='resid 1')
-    parser.add_argument('--iname1', help='name 1')
-    parser.add_argument('--isegid2', help='segid 2')
-    parser.add_argument('--iresid2', help='resid 2')
-    parser.add_argument('--iname2', help='name 2')
-    parser.add_argument('--isegid3', help='segid 3')
-    parser.add_argument('--iresid3', help='resid 3')
-    parser.add_argument('--iname3', help='name 3')
-    parser.add_argument('--isegid4', help='segid 4')
-    parser.add_argument('--iresid4', help='resid 4')
-    parser.add_argument('--iname4', help='name 4')
-    parser.add_argument('--isegid5', help='segid 1')
-    parser.add_argument('--iresid5', help='resid 1')
-    parser.add_argument('--iname5', help='name 1')
-    parser.add_argument('--isegid6', help='segid 2')
-    parser.add_argument('--iresid6', help='resid 2')
-    parser.add_argument('--iname6', help='name 2')
-    parser.add_argument('--isegid7', help='segid 3')
-    parser.add_argument('--iresid7', help='resid 3')
-    parser.add_argument('--iname7', help='name 3')
-    parser.add_argument('--isegid8', help='segid 4')
-    parser.add_argument('--iresid8', help='resid 4')
-    parser.add_argument('--iname8', help='name 4')
+    parser.add_argument('--iyml', help='input in yml format')
     parser.add_argument('--output', help='output')
     parser.add_argument('--oramachandran_plot', help='dihedral plot')
     return parser.parse_args()
@@ -55,35 +37,35 @@

args = parse_command_line(sys.argv)

+with open(args.iyml) as file:
+    params = yaml.load(file, Loader=yaml.FullLoader)
+
Dihedral = namedtuple(
     'Dihedral', ['atom1', 'atom2', 'atom3', 'atom4'])

+for k, v in params.items():
+    for a in ['phi', 'psi']:
+        assert (a in v), "Key %s is missing in inputs: %s " % (a, k)
+        atoms = []
+        for b in ['atom1', 'atom2', 'atom3', 'atom4']:
+            assert (b in v[a]), "Key %s is missing in inputs: %s %s" % (
+                b, k, a)
+            for c in ['segid', 'resid', 'name']:
+                assert (c in v[a][b]), \
+                        "Key %s is missing in inputs: %s %s %s " % (c, k, a, b)
+            atoms.append("(segid %s and resid %s and name %s)" %
+                         (v[a][b]['segid'], v[a][b]['resid'], v[a][b]['name']))
+        print(atoms)
+        if a == 'phi':
+            dihe_phi = Dihedral(atoms[0], atoms[1], atoms[2], atoms[3])
+        if a == 'psi':
+            dihe_psi = Dihedral(atoms[0], atoms[1], atoms[2], atoms[3])
+
# order of dihedral atom is the crystallographic definition
# (see glycanstructure.org)

-# phi
-atom1 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid1, args.iresid1, args.iname1)
-atom2 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid2, args.iresid2, args.iname2)
-atom3 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid3, args.iresid3, args.iname3)
-atom4 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid4, args.iresid4, args.iname4)
-
-dihe_phi = Dihedral(atom1, atom2, atom3, atom4)
-
-# psi
-atom1 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid5, args.iresid5, args.iname5)
-atom2 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid6, args.iresid6, args.iname6)
-atom3 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid7, args.iresid7, args.iname7)
-atom4 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid8, args.iresid8, args.iname8)
-
-dihe_psi = Dihedral(atom1, atom2, atom3, atom4)
+assert(dihe_phi), "phi dihedral doesn't exist"
+assert(dihe_psi), "psi dihedral doesn't exist"

def calc_torsion(dihedral):
@@ -120,8 +102,11 @@
     writer.writerows(zip(phi_frame, phi_series, psi_series))

with sns.axes_style("white"):
-    h = sns.jointplot(x=phi_series, y=psi_series, kind="kde", legend=True)
-    h.set_axis_labels(r'$\Phi$ (degrees)', r'$\Psi$ (degrees)')
+    h = sns.jointplot(x=phi_series, y=psi_series,
+                      kind="kde", space=0, legend=True)
+    h.set_axis_labels(r'$\phi$ (degrees)', r'$\psi$ (degrees)')
     h.ax_joint.set_xlim(-180, 180)
     h.ax_joint.set_ylim(-180, 180)
-    plt.savefig(args.oramachandran_plot, format='png')
+    h.ax_joint.xaxis.set_major_locator(ticker.MultipleLocator(60))
+    h.ax_joint.yaxis.set_major_locator(ticker.MultipleLocator(60))
+    plt.savefig(args.oramachandran_plot, format='png', bbox_inches='tight')

diff -r 8bd0e29927da -r 70a2d548e62c ramachandran_plots.xml
--- a/ramachandran_plots.xml Wed May 20 13:03:30 2020 -0400
+++ b/ramachandran_plots.xml Mon Aug 24 16:23:14 2020 -0400

[

b'@@ -1,199 +1,64 @@\n-<tool id="mdanalysis_ramachandran_plot" name="Ramachandran Plots" version="@VERSION@">\n+<tool id="mdanalysis_ramachandran_plot" name="Ramachandran Plots" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">\n <description>- calculate and plot the distribution of two dihedrals in a trajectory</description>\n <macros>\n <import>macros.xml</import>\n+ <token name="@GALAXY_VERSION@">0</token>\n </macros>\n <expand macro="requirements">\n- <requirement type="package" version="1.3.1">scipy</requirement>\n- <requirement type="package" version="0.9.0">seaborn</requirement>\n- <requirement type="package" version="1.1.0">nbdime</requirement>\n+ <requirement type="package" version="1.5.2">scipy</requirement>\n+ <requirement type="package" version="0.10.0">seaborn</requirement>\n+ <requirement type="package" version="2.0.0">nbdime</requirement>\n+ <requirement type="package" version="5.3.1">pyyaml</requirement>\n </expand>\n- <command detect_errors="exit_code">\n-<![CDATA[\n+ <command detect_errors="exit_code"><![CDATA[\n python \'$__tool_directory__/ramachandran_plots.py\'\n- --itraj \'$trajin\' \n+ --itraj \'$trajin\'\n --istr \'$strin\'\n --itrajext \'$trajin.ext\'\n --istrext \'$strin.ext\'\n- --isegid1 \'$phi.phi_segid1\'\n- --iresid1 \'$phi.phi_resid1\' \n- --iname1 \'$phi.phi_name1\'\n- --isegid2 \'$phi.phi_segid2\'\n- --iresid2 \'$phi.phi_resid2\' \n- --iname2 \'$phi.phi_name2\'\n- --isegid3 \'$phi.phi_segid3\'\n- --iresid3 \'$phi.phi_resid3\' \n- --iname3 \'$phi.phi_name3\'\n- --isegid4 \'$phi.phi_segid4\'\n- --iresid4 \'$phi.phi_resid4\' \n- --iname4 \'$phi.phi_name4\'\n- --isegid5 \'$psi.psi_segid1\'\n- --iresid5 \'$psi.psi_resid1\' \n- --iname5 \'$psi.psi_name1\'\n- --isegid6 \'$psi.psi_segid2\'\n- --iresid6 \'$psi.psi_resid2\' \n- --iname6 \'$psi.psi_name2\' \n- --isegid7 \'$psi.psi_segid3\' \n- --iresid7 \'$psi.psi_resid3\' \n- --iname7 \'$psi.psi_name3\' \n- --isegid8 \'$psi.psi_segid4\' \n- --iresid8 \'$psi.psi_resid4\' \n- --iname8 \'$psi.psi_name4\' \n- --output \'$output\' \n+ --iyml \'$ymlin\'\n+ --output \'$output\'\n --oramachandran_plot \'$ramachandran_plot\'\n 2>&1\n ]]></command>\n <inputs>\n- <expand macro="analysis_inputs"/>\n- <section name="phi" title="Phi" expanded="False">\n- <param name="phi_segid1" type="text" value="HET" label="Segment ID of atom 1">\n- <expand macro="sanitizer"/>\n- </param>\n- <param name="phi_resid1" type="text" value="3" label="Residue ID of atom 1">\n- <expand macro="sanitizer_resids"/>\n- </param>\n- <param name="phi_name1" type="text" value="O5" label="Atom name of atom 1">\n- <expand macro="sanitizer"/>\n- </param>\n- <param name="phi_segid2" type="text" value="HET" label="Segment ID of atom 2">\n- <expand macro="sanitizer"/>\n- </param>\n- <param name="phi_resid2" type="text" value="3" label="Residue ID of atom 2">\n- <expand macro="sanitizer_resids"/>\n- </param>\n- <param name="phi_name2" type="text" value="C1" label="Atom name of atom 2">\n- <expand macro="sanitizer"/>\n- </param>\n- <param name="phi_segid3" type="text" value="HET" label="Segment ID of atom 3">\n- <expand macro="sanitizer"/>\n- </param>\n- <param name="phi_resid3" type="text" value="2" label="Residue ID of atom 3">\n- <expand macro="sanitizer_resids"/>\n- </param>\n- <param name="phi_name3" type="text" value="O4" label="Atom name of atom 3">\n- <expand macro="sanitizer"/>\n- </param>\n- <param name="phi_segid4" type="text" value="HET" label="Segment ID of atom 4">\n- <expand ma'..b'</tests>\n <help><![CDATA[\n .. class:: infomark\n \n **What it does**\n- \n+\n A Ramachandran plot ([\xcf\x86,\xcf\x88] plot) was originally developed as a way to visualize the energetically allowed regions for backbone dihedral angles \xcf\x88 and \xcf\x86 of an amino acid.\n-It can be also used to calculate glycosidic \xcf\x86 and \xcf\x88 angles formed between carbohydrates. This tool can calculate and plot the histogram (Ramachandran plot) of user-defined \xcf\x86 and \xcf\x88 angles of a trajectory. \n+It can be also used to calculate glycosidic \xcf\x86 and \xcf\x88 angles formed between carbohydrates. This tool can calculate and plot the histogram (Ramachandran plot) of user-defined \xcf\x86 and \xcf\x88 angles of a trajectory.\n \n- - For protein \xcf\x86 and \xcf\x88 dihedral definitions see https://proteinstructures.com/Structure/Structure/Ramachandran-plot.html. \n- - For glycan \xcf\x86 and \xcf\x88 dihedral definitions see http://www.glycanstructure.org/\n \n _____\n \n@@ -204,13 +69,59 @@\n \n - Trajectory file (DCD).\n - PDB file.\n- - Segment IDs, residue IDs and names of the four atoms to calculate dihedrals.\n+ - Text file in YAML format with Segment IDs, residue IDs and names of the four atoms to calculate dihedrals.\n \n Note that a MDAnalysis \'segment\' is a larger organizational unit, for example one protein or all the solvent molecules or simply the whole system.\n \n+ - For protein \xcf\x86 and \xcf\x88 dihedral definitions see https://proteinstructures.com/Structure/Structure/Ramachandran-plot.html\n+ - For glycan \xcf\x86 and \xcf\x88 dihedral definitions, see\n+ - `Glycan Structure Website`_ - The glycosidic torsion angle definition is adopted from the crystallographic definition; O5-C1-O1-C\'x (\xce\xa6; phi), C1-O1-C\'x-C\'x-1 (\xce\xa8; psi), and O1-C\'6-C\'5-O\'5 (\xcf\x89; omega). The torsion angle between the first residue of the N-glycan chain and the side chain of the asparagine residue is defined as O5-C1-N\'D2-C\'G (\xce\xa6; phi) and C1-N\'D2-C\'G-C\'B (\xce\xa8; psi). The torsion angle between the first residue of the O-glycan chain and the side chain of the serine residue is defined as O5-C1-O\'G-C\'B (\xce\xa6; phi) and C1-O\'G-C\'B-C\'A (\xce\xa8; psi). For threonine residue, OG1 is used instead of OG. The atom names are based on the CHARMM topology.\n+ - `Glycosciences Website`_ - NMR definition - \xce\xa6 phi: H1-C1-O1-C\xe2\x80\xb2X \xce\xa8 psi: C1-O1-C\xe2\x80\xb2X-H\xe2\x80\xb2X \xcf\x89 omega: O1-C\xe2\x80\xb26-C\xe2\x80\xb25-H\xe2\x80\xb25 Crystallographic definition - \xce\xa6 phi: O5-C1-O1-C\xe2\x80\xb2X \xce\xa8 psi: C1-O1-C\xe2\x80\xb2X-C\xe2\x80\xb2X+1 \xcf\x89 omega: O1-C\xe2\x80\xb26-C\xe2\x80\xb25-O\xe2\x80\xb25\n+\n+ - An example of a yaml formatted selection for \xcf\x86-\xcf\x88 of a small glycoprotein called PROF with a carbohydrate portion called CARA where \xcf\x86=O5-C1-OG1-CB1 and \xcf\x88=C1-OG1-CB-CA for the selected segment and residue ids.\n+\n+ .. code-block:: yaml\n+\n+ ramachandran1:\n+ phi:\n+ atom1:\n+ segid: CARA\n+ resid: 1\n+ name: O5\n+ atom2:\n+ segid: CARA\n+ resid: 1\n+ name: C1\n+ atom3:\n+ segid: PROF\n+ resid: 4\n+ name: OG1\n+ atom4:\n+ segid: PROF\n+ resid: 4\n+ name: CB\n+ psi:\n+ atom1:\n+ segid: CARA\n+ resid: 1\n+ name: C1\n+ atom2:\n+ segid: PROF\n+ resid: 4\n+ name: OG1\n+ atom3:\n+ segid: PROF\n+ resid: 4\n+ name: CB\n+ atom4:\n+ segid: PROF\n+ resid: 4\n+ name: CA\n+ comment: pick visually using VMD using labels. Go to labels, dihedral to see the information about resname resid and atomname and then lookup the segname for ach atom.\n+\n _____\n \n- \n+\n .. class:: infomark\n \n **Output**\n@@ -218,7 +129,8 @@\n - Tab-separated file of raw data of the \xcf\x86,\xcf\x88 angles over time.\n - Image (as png) of the Ramachandran plot.\n \n-\n+ .. _`Glycan Structure Website`: http://www.glycanstructure.org/fragment-db/howto\n+ .. _`Glycosciences Website`: http://www.glycosciences.de/tools/glytorsion/\n ]]></help>\n <expand macro="citations" />\n </tool>\n'

diff -r 8bd0e29927da -r 70a2d548e62c test-data/test.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.yml Mon Aug 24 16:23:14 2020 -0400

@@ -0,0 +1,36 @@
+ramachandran1:
+  phi:
+    atom1:
+     segid: HET
+     resid: 3
+     name: O5
+    atom2:
+     segid: HET
+     resid: 3
+     name: C1
+    atom3:
+     segid: HET
+     resid: 2
+     name: O4
+    atom4:
+     segid: HET
+     resid: 2
+     name: C4
+  psi:
+    atom1:
+     segid: HET
+     resid: 3
+     name: C1
+    atom2:
+     segid: HET
+     resid: 2
+     name: O4
+    atom3:
+     segid: HET
+     resid: 2
+     name: C4
+    atom4:
+     segid: HET
+     resid: 2
+     name: C5
+  comment: pick visually using VMD using labels. Go to labels, dihedral to see the information about resname resid and atomname and then lookup the segname for ach atom.