changeset 4:70a2d548e62c draft

"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
author chemteam
date Mon, 24 Aug 2020 16:23:14 -0400
parents 8bd0e29927da
children af9f01ca6a5c
files end-to-end.py extract_rmsd.py macros.xml ramachandran_auto_protein.py ramachandran_auto_protein_html.j2 ramachandran_plots.py ramachandran_plots.xml test-data/test.yml
diffstat 8 files changed, 579 insertions(+), 233 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/end-to-end.py	Mon Aug 24 16:23:14 2020 -0400
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+import argparse
+import itertools
+import sys
+
+import MDAnalysis as mda
+
+import matplotlib
+import matplotlib.pyplot as plt
+
+import numpy as np
+import numpy.linalg
+
+matplotlib.use('Agg')  # noqa
+
+
+def parse_command_line(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--itraj', help='input traj')
+    parser.add_argument('--istr', help='input str')
+    parser.add_argument('--itrajext', help='input traj ext')
+    parser.add_argument('--istrext', help='input str ext')
+    parser.add_argument('--isegid1', help='segid 1')
+    parser.add_argument('--ilabel', help='plot label')
+    parser.add_argument('--ititle1', help='plot title')
+    parser.add_argument('--output1', help='output1 - timeseries')
+    parser.add_argument('--o_plot', help='End to End plot')
+    return parser.parse_args()
+
+
+args = parse_command_line(sys.argv)
+
+
+u = mda.Universe(args.istr, args.itraj,
+                 topology_format=args.istrext, format=args.itrajext)
+
+ntermatoms = "(segid %s and name N)" % \
+    (args.isegid1)
+ctermatoms = "(segid %s and name C)" % \
+    (args.isegid1)
+# not sure how robust this selection really is
+nterm = u.select_atoms(ntermatoms)[0]  # first atom named N
+cterm = u.select_atoms(ctermatoms)[-1]  # takes the last atom named 'C'
+
+enddist = []
+
+for ts in u.trajectory:  # iterate through all frames
+    r = cterm.position - nterm.position  # e-to-e vector from atom positions
+    d = numpy.linalg.norm(r)   # end-to-end distance
+    enddist.append((ts.frame, d))
+
+enddist = np.array(enddist)
+
+
+color = itertools.cycle(['r', 'b', 'gold'])
+
+fig, axs = plt.subplots(1, 2, sharex=False, sharey=False, tight_layout=True)
+
+params = {
+    'axes.labelsize': 8,
+    'legend.fontsize': 10,
+    'xtick.labelsize': 10,
+    'ytick.labelsize': 10,
+    'text.usetex': False,
+    'figure.figsize': [4.5, 4.5],
+    'figure.dpi': 300
+}
+plt.rcParams.update(params)
+
+axs[0].plot(enddist[:, 0], enddist[:, 1], 'r-', lw=2, label=args.ilabel)
+axs[0].set_xlabel("number of frames")
+axs[0].set_ylabel(r"End to end distance  ($\AA$)")
+axs[0].legend()
+
+n, bins, patches = axs[1].hist(enddist[:, 1], color=next(
+    color), label=args.ilabel, alpha=0.5, density=True, stacked=True)
+
+axs[1].legend()
+axs[1].set_ylabel('Density Normalised Frequency')
+axs[1].set_xlabel(r'End to end distance ($\AA$)')
+fig.suptitle(args.ititle1, fontsize=12, fontweight='bold')
+fig.subplots_adjust(top=0.45)
+
+print(
+    " \n".join(
+        [
+            'The End to End distance is measured between the following atoms:',
+            str(nterm),
+            str(cterm)]))
+
+# svg is better but sticking with png for now
+plt.savefig(args.o_plot, format='png')
+
+
+np.savetxt(args.output1, enddist, delimiter='\t')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_rmsd.py	Mon Aug 24 16:23:14 2020 -0400
@@ -0,0 +1,129 @@
+import argparse
+import json
+
+import MDAnalysis as m
+from MDAnalysis.analysis import align, rms
+from MDAnalysis.analysis.base import AnalysisFromFunction
+from MDAnalysis.coordinates.memory import MemoryReader
+
+import numpy as np
+
+
+def calc_rmsd(str_files, traj_files, ref_str, str_format, traj_format,
+              ref_str_format, filepath_out, group, start, end, step,
+              fitting_atoms):
+    """
+    the function will cycle through range 0 to no_t and load all files found.
+
+    str_files: text file with filepaths for structures, one on each line
+    traj_files: text file with filepaths for trajectories, one on each line
+    ref_str: reference structure for fitting
+    filepath_in: directory where the files are located
+    filepath_out: pickle file where results (3D matrix) should be saved to
+
+    group: atoms for which RMSD should be calculated;
+        use the MDAnalysis selection language
+    fitting_atoms: atoms used for str alignment prior to RMSD calculation;
+        use the MDAnalysis selection language
+
+    start: first trajectory frame to calculate RMSD
+    end: last trajectory frame to calculate RMSD
+    step: how frequently frames are sampled between start and end; obviously,
+        the larger the step, the quicker the script finishes
+    """
+
+    # open list of files
+    with open(str_files) as f1, open(traj_files) as f2:
+        str_file_list = f1.read().strip().split('\n')
+        traj_file_list = f2.read().strip().split('\n')
+
+        if sum(1 for line in f1) != sum(1 for line in f2):
+            raise IOError('Number of structure and trajectory files unequal.')
+
+    no_t = len(traj_file_list)
+
+    data = np.zeros((no_t, no_t,
+                    int((end - start)/step + ((end - start) % step > 0))))
+
+    # load files
+    universes = {}
+
+    for traj in range(no_t):
+        mobile = m.Universe(str_file_list[traj], traj_file_list[traj],
+                            format=traj_format, topology_format=str_format)
+        ref = m.Universe(ref_str, topology_format=ref_str_format)
+
+        mobile.trajectory[-1]  # set mobile trajectory to last frame
+        ref.trajectory[0]  # set reference trajectory to first frame
+
+        # perform alignment
+        align.AlignTraj(mobile, ref, select=fitting_atoms,
+                        in_memory=True).run()
+
+        grp = mobile.select_atoms(group)
+        universes[traj] = m.core.universe.Merge(grp)  # create Universe w grp
+        coordinates = AnalysisFromFunction(lambda ag: ag.positions.copy(),
+                                           grp).run().results  # write to uv
+        universes[traj].load_new(coordinates, format=MemoryReader)
+
+    print("All trajs loaded by MDAnalysis")
+
+    # calculate differences
+    for traj1 in range(no_t):
+        print("Calculating differences for traj {}".format(traj1))
+        for traj2 in range(traj1):
+
+            u1 = universes[traj1]
+            u2 = universes[traj2]
+
+            l1 = u1.select_atoms(group)
+            l2 = u2.select_atoms(group)
+
+            rmsd = rms.RMSD(l1, l2)
+
+            rmsd.run()
+
+            data[traj1, traj2] = rmsd.rmsd[:, 2]
+            data[traj2, traj1] = rmsd.rmsd[:, 2]
+
+    with open(filepath_out, 'w') as f:
+        json.dump(data.tolist(), f, indent=4, sort_keys=True)
+
+    print("Done!")
+    return
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--trajs', required=True,
+                        help='File containing trajectory filepaths.')
+    parser.add_argument("--strs",
+                        help='File containing structure filepaths.')
+    parser.add_argument("--ref-str",
+                        help='File containing reference structure.')
+    parser.add_argument('--traj-format', required=True,
+                        help='Trajectory format.')
+    parser.add_argument("--str-format", help='Structure format.')
+    parser.add_argument("--ref-str-format",
+                        help='Reference structure format.')
+    parser.add_argument('-o', '--outfile',
+                        help="Path to the output JSON file")
+    parser.add_argument('--group', help="Atoms for which RMSD should be"
+                        "calculated in MDAnalysis selection language")
+    parser.add_argument('--fitting', help="Fitting atoms for alignment"
+                        "prior to RMSD calculation")
+    parser.add_argument('--start', type=int,
+                        help="First trajectory frame to calculate RMSD")
+    parser.add_argument('--end', type=int,
+                        help="Last trajectory frame to calculate RMSD")
+    parser.add_argument('--step', type=int,
+                        help="Frame sampling frequency for RMSD calculation")
+    args = parser.parse_args()
+
+    calc_rmsd(args.strs, args.trajs, args.ref_str, args.str_format,
+              args.traj_format, args.ref_str_format, args.outfile,
+              args.group, args.start, args.end, args.step, args.fitting)
+
+
+if __name__ == "__main__":
+    main()
--- a/macros.xml	Wed May 20 13:03:30 2020 -0400
+++ b/macros.xml	Mon Aug 24 16:23:14 2020 -0400
@@ -1,36 +1,36 @@
 <macros>
-    <token name="@VERSION@">0.20</token>
+    <token name="@TOOL_VERSION@">1.0.0</token>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="0.20.1">mdanalysis</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">mdanalysis</requirement>
             <yield/>
         </requirements>
     </xml>
     <xml name="analysis_inputs">
-        <param format="dcd,xtc" name="trajin" type="data" label="DCD/XTC trajectory input"/>
-        <param format="pdb,gro" name="strin" type="data" label="PDB/GRO input"/>
+        <param format="dcd,xtc" name="trajin" type="data" label="DCD/XTC trajectory input" />
+        <param format="pdb,gro" name="strin" type="data" label="PDB/GRO input" />
         <yield/>
     </xml>
     <xml name="sanitizer">
         <sanitizer invalid_char="">
-            <valid initial="string.ascii_letters,string.digits"/>
-         </sanitizer>
-         <yield/>
+            <valid initial="string.ascii_letters,string.digits" />
+        </sanitizer>
+        <yield/>
     </xml>
     <xml name="sanitizer_resids">
         <sanitizer invalid_char="">
-            <valid initial="string.digits"/>
+            <valid initial="string.digits" />
         </sanitizer>
         <yield/>
     </xml>
     <xml name="tests_inputs">
-        <param name="trajin" value="test.dcd" ftype="dcd"/>
-        <param name="strin" value="test.pdb" ftype="pdb"/>
+        <param name="trajin" value="test.dcd" ftype="dcd" />
+        <param name="strin" value="test.pdb" ftype="pdb" />
         <yield/>
     </xml>
     <xml name="tests_inputs_gmx">
-        <param name="trajin" value="test.xtc" ftype="xtc"/>
-        <param name="strin" value="test.gro" ftype="gro"/>
+        <param name="trajin" value="test.xtc" ftype="xtc" />
+        <param name="strin" value="test.gro" ftype="gro" />
         <yield/>
     </xml>
     <xml name="citations">
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ramachandran_auto_protein.py	Mon Aug 24 16:23:14 2020 -0400
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+
+import argparse
+import base64
+import importlib
+import sys
+
+import MDAnalysis as mda
+from MDAnalysis.analysis.dihedrals import Ramachandran
+
+import h5py
+
+from jinja2 import Environment, FileSystemLoader
+
+import matplotlib
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+
+import numpy as np
+import numpy.linalg
+
+import seaborn as sns
+
+
+matplotlib.use('Agg')  # noqa
+
+
+def parse_command_line(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--itraj', help='input traj')
+    parser.add_argument('--istr', help='input str')
+    parser.add_argument('--itrajext', help='input traj ext')
+    parser.add_argument('--istrext', help='input str ext')
+    parser.add_argument('--isegid1', help='segid 1')
+    parser.add_argument('--iresid1', help='resid start')
+    parser.add_argument('--iresid2', help='resid end')
+    parser.add_argument('--iresname', help='resname e.g. ALA')
+    parser.add_argument('--igroupby', help='groupby names or ids')
+    parser.add_argument('--itemplatepath', help='template path')
+    parser.add_argument('--o_plot1', help='MDA Ramachandran plot')
+    parser.add_argument('--o_plot2', help='Seaborn Ramachandran plot')
+    parser.add_argument('--o_data1', help='Timeseries in HDF5 format')
+    parser.add_argument('--o_html1', help='Html overview output of all plots')
+    return parser.parse_args()
+
+
+args = parse_command_line(sys.argv)
+
+currentpath = "."
+if args.itemplatepath is not None:
+    currentpath = args.itemplatepath
+
+
+u = mda.Universe(args.istr, args.itraj,
+                 topology_format=args.istrext, format=args.itrajext)
+selection = "(segid %s)" % \
+    (args.isegid1)
+
+if args.iresname is not None:
+    selection = "(segid %s and resname %s)" % \
+        (args.isegid1, args.iresname)
+
+if args.iresid1 is not None and args.iresid2 is not None:
+    assert(int(args.iresid1) > 0), "ResID numbering starts at 1 for this tool."
+    assert(int(args.iresid2) > 0), "ResID numbering starts at 1 for this tool."
+    assert(int(args.iresid2) > int(args.iresid1)
+           ), "ResID2 must be at least ResID1+1"
+    selection = "(segid %s and resid %s-%s)" % \
+        (args.isegid1, int(args.iresid1), int(args.iresid2))
+    if args.iresname is not None:
+        selection = "(segid %s and resid %s-%s and resname %s)" % \
+            (args.isegid1, int(args.iresid1), int(args.iresid2), args.iresname)
+
+r = u.select_atoms(selection)
+
+assert(r != u.select_atoms('name thiscannotpossiblyexist')
+       ), \
+               """The selection you specified returns an empty result.
+               Check segment names and residue ID's. Also check the
+                structure and trajectory file selected are the correct ones"""
+
+if args.igroupby is not None:
+    group_selections = {}  # dictionary of selections
+    if args.igroupby == 'name':
+        groupby = sorted(list(set(r.resnames)))
+        for e in groupby:
+            s = r & u.select_atoms("resname %s" % e)
+            this_sel = "%s and resname %s" % (selection, e)
+            group_selections[this_sel] = s
+    elif args.igroupby == 'id':
+        groupby = sorted(list(set(r.resids)))
+        for e in groupby:
+            s = r & u.select_atoms("resid %s" % e)
+            this_sel = "%s and resid %s" % (selection, e)
+            group_selections[this_sel] = s
+    else:
+        assert False, ("Invalid argument for igroupby. "
+                       "Only name and id are valid options.")
+
+
+def ramachandran_plot(atomgroup, selection, outputfile1, outputfile2,
+                      image_format='png'):
+    # plot standard mdanalysis and seaborn 2D with kde
+    R = Ramachandran(atomgroup).run()
+    fig, ax = plt.subplots(figsize=plt.figaspect(1))
+    R.plot(ax=ax, color='k', marker='.', ref=True)
+
+    a = R.angles.reshape(np.prod(R.angles.shape[:2]), 2)
+    # open hdf file
+    with h5py.File(args.o_data1, 'a') as f:
+        setname = "%s" % (selection)
+        f["/" + setname + "/ramachandran/phi"] = a[:, 0]
+        f["/" + setname + "/ramachandran/psi"] = a[:, 1]
+    plt.tight_layout()
+    # svg is better but sticking with png for now
+    plt.savefig(outputfile1, format=image_format)
+
+    sns.reset_defaults()
+    importlib.reload(plt)
+    importlib.reload(sns)
+    with sns.axes_style("white"):
+        h = sns.jointplot(x=a[:, 0], y=a[:, 1],
+                          kind="kde", space=0)
+        h.set_axis_labels(r'$\phi$ (deg)', r'$\psi$ (deg)')
+        h.ax_joint.set_xlim(-180, 180)
+        h.ax_joint.set_ylim(-180, 180)
+        h.ax_joint.xaxis.set_major_locator(ticker.MultipleLocator(60))
+        h.ax_joint.yaxis.set_major_locator(ticker.MultipleLocator(60))
+        plt.savefig(outputfile2, format=image_format, bbox_inches='tight')
+
+
+def get_base64_encoded_image(image_path):
+    """  encode image to string for use in html later"""
+    with open(image_path, "rb") as img_file:
+        return base64.b64encode(img_file.read()).decode('utf-8')
+
+
+plots = []
+if args.igroupby is not None:
+    for k, v in group_selections.items():
+        print(k, v)
+        try:
+            ramachandran_plot(v, str(k), "ramachandran1" +
+                              str(k), "ramachandran2" + str(k))
+            plots.append({'Name': "%s" % (k), 'plot1':
+                         get_base64_encoded_image("ramachandran1" + str(k)),
+                         'plot2': get_base64_encoded_image("ramachandran2"
+                          + str(k))})
+        except Exception as einstance:
+            print(type(einstance))
+            print(einstance.args)
+            print(einstance)
+
+ramachandran_plot(r, selection, args.o_plot1, args.o_plot2)
+plots.insert(0, {'Name': selection, 'plot1': get_base64_encoded_image(
+    args.o_plot1), 'plot2': get_base64_encoded_image(args.o_plot2)})
+
+template_environment = Environment(loader=FileSystemLoader(
+    currentpath), lstrip_blocks=True, trim_blocks=True)
+template = template_environment.get_template(
+    'ramachandran_auto_protein_html.j2')
+with open(args.o_html1, 'w+') as f:
+    f.write(template.render(title="Ramachandran Plots", plots=plots))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ramachandran_auto_protein_html.j2	Mon Aug 24 16:23:14 2020 -0400
@@ -0,0 +1,25 @@
+<html>
+
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+  <title>{{ title }}</title>
+</head>
+
+<body>
+  <table>
+    <tr>
+      <th>Selection</th>
+      <th>Ramachandran scatter plot</th>
+      <th>Ramachandran histogram </th>
+    </tr>
+    {% for plot in plots %}
+    <tr>
+      <td>{{ plot['Name'] }}</td>
+      <td style="vertical-align:center"><img src="data:image/png;base64,{{plot['plot1']}}" /> </td>
+      <td style="vertical-align:center"><img src="data:image/png;base64,{{plot['plot2']}}" /> </td>
+    </tr>
+    {% endfor %}
+  </table>
+</body>
+
+</html>
--- a/ramachandran_plots.py	Wed May 20 13:03:30 2020 -0400
+++ b/ramachandran_plots.py	Mon Aug 24 16:23:14 2020 -0400
@@ -10,11 +10,16 @@
 
 import matplotlib
 import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+
 
 import numpy as np
 
 import seaborn as sns
 
+
+import yaml
+
 matplotlib.use('Agg')  # noqa
 
 
@@ -24,30 +29,7 @@
     parser.add_argument('--istr', help='input str')
     parser.add_argument('--itrajext', help='input traj ext')
     parser.add_argument('--istrext', help='input str ext')
-    parser.add_argument('--isegid1', help='segid 1')
-    parser.add_argument('--iresid1', help='resid 1')
-    parser.add_argument('--iname1', help='name 1')
-    parser.add_argument('--isegid2', help='segid 2')
-    parser.add_argument('--iresid2', help='resid 2')
-    parser.add_argument('--iname2', help='name 2')
-    parser.add_argument('--isegid3', help='segid 3')
-    parser.add_argument('--iresid3', help='resid 3')
-    parser.add_argument('--iname3', help='name 3')
-    parser.add_argument('--isegid4', help='segid 4')
-    parser.add_argument('--iresid4', help='resid 4')
-    parser.add_argument('--iname4', help='name 4')
-    parser.add_argument('--isegid5', help='segid 1')
-    parser.add_argument('--iresid5', help='resid 1')
-    parser.add_argument('--iname5', help='name 1')
-    parser.add_argument('--isegid6', help='segid 2')
-    parser.add_argument('--iresid6', help='resid 2')
-    parser.add_argument('--iname6', help='name 2')
-    parser.add_argument('--isegid7', help='segid 3')
-    parser.add_argument('--iresid7', help='resid 3')
-    parser.add_argument('--iname7', help='name 3')
-    parser.add_argument('--isegid8', help='segid 4')
-    parser.add_argument('--iresid8', help='resid 4')
-    parser.add_argument('--iname8', help='name 4')
+    parser.add_argument('--iyml', help='input in yml format')
     parser.add_argument('--output', help='output')
     parser.add_argument('--oramachandran_plot', help='dihedral plot')
     return parser.parse_args()
@@ -55,35 +37,35 @@
 
 args = parse_command_line(sys.argv)
 
+with open(args.iyml) as file:
+    params = yaml.load(file, Loader=yaml.FullLoader)
+
 Dihedral = namedtuple(
     'Dihedral', ['atom1', 'atom2', 'atom3', 'atom4'])
 
+for k, v in params.items():
+    for a in ['phi', 'psi']:
+        assert (a in v), "Key %s is missing in inputs: %s " % (a, k)
+        atoms = []
+        for b in ['atom1', 'atom2', 'atom3', 'atom4']:
+            assert (b in v[a]), "Key %s is missing in inputs: %s %s" % (
+                b, k, a)
+            for c in ['segid', 'resid', 'name']:
+                assert (c in v[a][b]), \
+                        "Key %s is missing in inputs: %s %s %s " % (c, k, a, b)
+            atoms.append("(segid %s and resid %s and name %s)" %
+                         (v[a][b]['segid'], v[a][b]['resid'], v[a][b]['name']))
+        print(atoms)
+        if a == 'phi':
+            dihe_phi = Dihedral(atoms[0], atoms[1], atoms[2], atoms[3])
+        if a == 'psi':
+            dihe_psi = Dihedral(atoms[0], atoms[1], atoms[2], atoms[3])
+
 # order of dihedral atom is the crystallographic definition
 # (see glycanstructure.org)
 
-# phi
-atom1 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid1, args.iresid1, args.iname1)
-atom2 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid2, args.iresid2, args.iname2)
-atom3 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid3, args.iresid3, args.iname3)
-atom4 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid4, args.iresid4, args.iname4)
-
-dihe_phi = Dihedral(atom1, atom2, atom3, atom4)
-
-# psi
-atom1 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid5, args.iresid5, args.iname5)
-atom2 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid6, args.iresid6, args.iname6)
-atom3 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid7, args.iresid7, args.iname7)
-atom4 = "(segid %s and resid %s and name %s)" % \
-    (args.isegid8, args.iresid8, args.iname8)
-
-dihe_psi = Dihedral(atom1, atom2, atom3, atom4)
+assert(dihe_phi), "phi dihedral doesn't exist"
+assert(dihe_psi), "psi dihedral doesn't exist"
 
 
 def calc_torsion(dihedral):
@@ -120,8 +102,11 @@
     writer.writerows(zip(phi_frame, phi_series, psi_series))
 
 with sns.axes_style("white"):
-    h = sns.jointplot(x=phi_series, y=psi_series, kind="kde", legend=True)
-    h.set_axis_labels(r'$\Phi$ (degrees)', r'$\Psi$ (degrees)')
+    h = sns.jointplot(x=phi_series, y=psi_series,
+                      kind="kde", space=0, legend=True)
+    h.set_axis_labels(r'$\phi$ (degrees)', r'$\psi$ (degrees)')
     h.ax_joint.set_xlim(-180, 180)
     h.ax_joint.set_ylim(-180, 180)
-    plt.savefig(args.oramachandran_plot, format='png')
+    h.ax_joint.xaxis.set_major_locator(ticker.MultipleLocator(60))
+    h.ax_joint.yaxis.set_major_locator(ticker.MultipleLocator(60))
+    plt.savefig(args.oramachandran_plot, format='png', bbox_inches='tight')
--- a/ramachandran_plots.xml	Wed May 20 13:03:30 2020 -0400
+++ b/ramachandran_plots.xml	Mon Aug 24 16:23:14 2020 -0400
@@ -1,199 +1,64 @@
-<tool id="mdanalysis_ramachandran_plot" name="Ramachandran Plots" version="@VERSION@">
+<tool id="mdanalysis_ramachandran_plot" name="Ramachandran Plots" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
     <description>- calculate and plot the distribution of two dihedrals in a trajectory</description>
     <macros>
         <import>macros.xml</import>
+        <token name="@GALAXY_VERSION@">0</token>
     </macros>
     <expand macro="requirements">
-        <requirement type="package" version="1.3.1">scipy</requirement>
-        <requirement type="package" version="0.9.0">seaborn</requirement>
-        <requirement type="package" version="1.1.0">nbdime</requirement>
+        <requirement type="package" version="1.5.2">scipy</requirement>
+        <requirement type="package" version="0.10.0">seaborn</requirement>
+        <requirement type="package" version="2.0.0">nbdime</requirement>
+        <requirement type="package" version="5.3.1">pyyaml</requirement>
     </expand>
-    <command detect_errors="exit_code">
-<![CDATA[
+    <command detect_errors="exit_code"><![CDATA[
     python '$__tool_directory__/ramachandran_plots.py'
-        --itraj '$trajin' 
+        --itraj '$trajin'
         --istr '$strin'
         --itrajext '$trajin.ext'
         --istrext '$strin.ext'
-        --isegid1  '$phi.phi_segid1'
-        --iresid1  '$phi.phi_resid1' 
-        --iname1   '$phi.phi_name1'
-        --isegid2  '$phi.phi_segid2'
-        --iresid2  '$phi.phi_resid2' 
-        --iname2   '$phi.phi_name2'
-        --isegid3  '$phi.phi_segid3'
-        --iresid3  '$phi.phi_resid3' 
-        --iname3   '$phi.phi_name3'
-        --isegid4  '$phi.phi_segid4'
-        --iresid4  '$phi.phi_resid4' 
-        --iname4   '$phi.phi_name4'
-        --isegid5  '$psi.psi_segid1'
-        --iresid5  '$psi.psi_resid1' 
-        --iname5   '$psi.psi_name1'
-        --isegid6  '$psi.psi_segid2'
-        --iresid6  '$psi.psi_resid2' 
-        --iname6   '$psi.psi_name2' 
-        --isegid7  '$psi.psi_segid3' 
-        --iresid7  '$psi.psi_resid3' 
-        --iname7   '$psi.psi_name3' 
-        --isegid8  '$psi.psi_segid4' 
-        --iresid8  '$psi.psi_resid4' 
-        --iname8   '$psi.psi_name4' 
-        --output   '$output'  
+        --iyml '$ymlin'
+        --output   '$output'
         --oramachandran_plot '$ramachandran_plot'
     2>&1
 ]]></command>
     <inputs>
-        <expand macro="analysis_inputs"/>
-        <section name="phi" title="Phi" expanded="False">
-          <param name="phi_segid1"  type="text" value="HET" label="Segment ID of atom 1">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="phi_resid1"  type="text" value="3" label="Residue ID of atom 1">
-            <expand macro="sanitizer_resids"/>
-          </param>
-          <param name="phi_name1"  type="text" value="O5" label="Atom name of atom 1">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="phi_segid2"  type="text" value="HET" label="Segment ID of atom 2">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="phi_resid2"  type="text" value="3" label="Residue ID of atom 2">
-            <expand macro="sanitizer_resids"/>
-          </param>
-          <param name="phi_name2"  type="text" value="C1" label="Atom name of atom 2">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="phi_segid3"  type="text" value="HET" label="Segment ID of atom 3">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="phi_resid3"  type="text" value="2" label="Residue ID of atom 3">
-            <expand macro="sanitizer_resids"/>
-          </param>
-          <param name="phi_name3"  type="text" value="O4" label="Atom name of atom 3">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="phi_segid4"  type="text" value="HET" label="Segment ID of atom 4">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="phi_resid4"  type="text" value="2" label="Residue ID of atom 4">
-            <expand macro="sanitizer_resids"/>
-          </param>
-          <param name="phi_name4"  type="text" value="C4" label="Atom name of atom 4">
-            <expand macro="sanitizer"/>
-          </param>
-        </section>
-        <section name="psi" title="Psi" expanded="False">
-          <param name="psi_segid1"  type="text" value="HET" label="Segment ID of atom 1">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="psi_resid1"  type="text" value="3" label="Residue ID of atom 1">
-            <expand macro="sanitizer_resids"/>
-          </param>
-          <param name="psi_name1"  type="text" value="C1" label="Atom name of atom 1">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="psi_segid2"  type="text" value="HET" label="Segment ID of atom 2">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="psi_resid2"  type="text" value="2" label="Residue ID of atom 2">
-            <expand macro="sanitizer_resids"/>
-          </param>
-          <param name="psi_name2"  type="text" value="O4" label="Atom name of atom 2">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="psi_segid3"  type="text" value="HET" label="Segment ID of atom 3">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="psi_resid3"  type="text" value="2" label="Residue ID of atom 3">
-            <expand macro="sanitizer_resids"/>
-          </param>
-          <param name="psi_name3"  type="text" value="C4" label="Atom name of atom 3">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="psi_segid4"  type="text" value="HET" label="Segment ID of atom 4">
-            <expand macro="sanitizer"/>
-          </param>
-          <param name="psi_resid4"  type="text" value="2" label="Residue ID of atom 4">
-            <expand macro="sanitizer_resids"/>
-          </param>
-          <param name="psi_name4"  type="text" value="C3" label="Atom name of atom 4">
-            <expand macro="sanitizer"/>
-          </param>
+        <expand macro="analysis_inputs" />
+        <section name="ymlparam" title="Torsion Parameters" expanded="False">
+            <param format="txt" name="ymlin" type="data" label="Torsion parameters in yml format" help="Information in YAML format about the segid, resid, and name for atoms 1-4 of the phi and psi angles. Example formatting included in the main help text"/>
         </section>
     </inputs>
     <outputs>
-        <data format="tabular" name="output" label="Ramachandran Plot raw data"/>
-        <data format="png" name="ramachandran_plot" label="Ramachandran Plot"/>
+        <data format="tabular" name="output" label="Ramachandran Plot raw data" />
+        <data format="png" name="ramachandran_plot" label="Ramachandran Plot" />
     </outputs>
     <tests>
         <test>
-            <expand macro="tests_inputs"/>
-            <param name="phi_segid1" value="HET"/>
-            <param name="phi_resid1" value="3"/>
-            <param name="phi_name1" value="O5"/>
-            <param name="phi_segid2" value="HET"/>
-            <param name="phi_resid2" value="3"/>
-            <param name="phi_name2" value="C1"/>
-            <param name="phi_segid3" value="HET"/>
-            <param name="phi_resid3" value="2"/>
-            <param name="phi_name3" value="O4"/>
-            <param name="phi_segid4" value="HET"/>
-            <param name="phi_resid4" value="2"/>
-            <param name="phi_name4" value="C4"/>
-            <param name="psi_segid1" value="HET"/>
-            <param name="psi_resid1" value="3"/>
-            <param name="psi_name1" value="C1"/>
-            <param name="psi_segid2" value="HET"/>
-            <param name="psi_resid2" value="2"/>
-            <param name="psi_name2" value="O4"/>
-            <param name="psi_segid3" value="HET"/>
-            <param name="psi_resid3" value="2"/>
-            <param name="psi_name3" value="C4"/>
-            <param name="psi_segid4" value="HET"/>
-            <param name="psi_resid4" value="2"/>
-            <param name="psi_name4" value="C3"/>
-            <output name="output" file="Ramachandran_Plot_raw_data.tabular" />
-        </test>
-        <test>
-            <expand macro="tests_inputs_gmx"/>
-            <param name="phi_segid1" value="SYSTEM"/>
-            <param name="phi_resid1" value="3"/>
-            <param name="phi_name1" value="O5"/>
-            <param name="phi_segid2" value="SYSTEM"/>
-            <param name="phi_resid2" value="3"/>
-            <param name="phi_name2" value="C1"/>
-            <param name="phi_segid3" value="SYSTEM"/>
-            <param name="phi_resid3" value="2"/>
-            <param name="phi_name3" value="O4"/>
-            <param name="phi_segid4" value="SYSTEM"/>
-            <param name="phi_resid4" value="2"/>
-            <param name="phi_name4" value="C4"/>
-            <param name="psi_segid1" value="SYSTEM"/>
-            <param name="psi_resid1" value="3"/>
-            <param name="psi_name1" value="C1"/>
-            <param name="psi_segid2" value="SYSTEM"/>
-            <param name="psi_resid2" value="2"/>
-            <param name="psi_name2" value="O4"/>
-            <param name="psi_segid3" value="SYSTEM"/>
-            <param name="psi_resid3" value="2"/>
-            <param name="psi_name3" value="C4"/>
-            <param name="psi_segid4" value="SYSTEM"/>
-            <param name="psi_resid4" value="2"/>
-            <param name="psi_name4" value="C3"/>
-            <output name="output" file="Ramachandran_Plot_raw_data_gmx.tabular" />
+            <param name="trajin" value="test.dcd" ftype="dcd" />
+            <param name="strin" value="test.pdb" ftype="pdb" />
+            <section name="ymlparam">
+                <param name="ymlin" value="test.yml" />
+            </section>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="-144.503" />
+                    <has_text text="-140.553" />
+                </assert_contents>
+            </output>
+            <output name="ramachandran_plot">
+                <assert_contents>
+                    <has_size value="18522" delta="2000" />
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
 .. class:: infomark
 
 **What it does**
-        
+
 A Ramachandran plot ([φ,ψ] plot) was originally developed as a way to visualize the energetically allowed regions for backbone dihedral angles ψ and φ of an amino acid.
-It can be also used to calculate glycosidic φ and ψ angles formed between carbohydrates. This tool can calculate and plot the histogram (Ramachandran plot) of user-defined φ and ψ angles of a trajectory. 
+It can be also used to calculate glycosidic φ and ψ angles formed between carbohydrates. This tool can calculate and plot the histogram (Ramachandran plot) of user-defined φ and ψ angles of a trajectory.
 
-  - For protein φ and ψ dihedral definitions see https://proteinstructures.com/Structure/Structure/Ramachandran-plot.html. 
-  - For glycan φ and ψ dihedral definitions see http://www.glycanstructure.org/
 
 _____
 
@@ -204,13 +69,59 @@
 
        - Trajectory file  (DCD).
        - PDB file.
-       - Segment IDs, residue IDs and names of the four atoms to calculate dihedrals.
+       - Text file in YAML format with Segment IDs, residue IDs and names of the four atoms to calculate dihedrals.
 
 Note that a MDAnalysis 'segment' is a larger organizational unit, for example one protein or all the solvent molecules or simply the whole system.
 
+  - For protein φ and ψ dihedral definitions see https://proteinstructures.com/Structure/Structure/Ramachandran-plot.html
+  - For glycan φ and ψ dihedral definitions, see
+     - `Glycan Structure Website`_ - The glycosidic torsion angle definition is adopted from the crystallographic definition; O5-C1-O1-C'x (Φ; phi), C1-O1-C'x-C'x-1 (Ψ; psi), and O1-C'6-C'5-O'5 (ω; omega). The torsion angle between the first residue of the N-glycan chain and the side chain of the asparagine residue is defined as O5-C1-N'D2-C'G (Φ; phi) and C1-N'D2-C'G-C'B (Ψ; psi). The torsion angle between the first residue of the O-glycan chain and the side chain of the serine residue is defined as O5-C1-O'G-C'B (Φ; phi) and C1-O'G-C'B-C'A (Ψ; psi). For threonine residue, OG1 is used instead of OG. The atom names are based on the CHARMM topology.
+     - `Glycosciences Website`_ - NMR definition - Φ phi: H1-C1-O1-C′X Ψ psi: C1-O1-C′X-H′X ω omega: O1-C′6-C′5-H′5  Crystallographic definition - Φ phi: O5-C1-O1-C′X Ψ psi: C1-O1-C′X-C′X+1 ω omega: O1-C′6-C′5-O′5
+
+  - An example of a yaml formatted selection for φ-ψ of a small glycoprotein called PROF with a carbohydrate portion called CARA where φ=O5-C1-OG1-CB1 and ψ=C1-OG1-CB-CA for the selected segment and residue ids.
+
+    .. code-block:: yaml
+
+      ramachandran1:
+        phi:
+          atom1:
+           segid: CARA
+           resid: 1
+           name: O5
+          atom2:
+           segid: CARA
+           resid: 1
+           name: C1
+          atom3:
+           segid: PROF
+           resid: 4
+           name: OG1
+          atom4:
+           segid: PROF
+           resid: 4
+           name: CB
+        psi:
+          atom1:
+           segid: CARA
+           resid: 1
+           name: C1
+          atom2:
+           segid: PROF
+           resid: 4
+           name: OG1
+          atom3:
+           segid: PROF
+           resid: 4
+           name: CB
+          atom4:
+           segid: PROF
+           resid: 4
+           name: CA
+        comment: pick visually using VMD using labels. Go to labels, dihedral to see the information about resname resid and atomname and then lookup the segname for ach atom.
+
 _____
 
-        
+
 .. class:: infomark
 
 **Output**
@@ -218,7 +129,8 @@
        - Tab-separated file of raw data of the φ,ψ angles over time.
        - Image (as png) of the Ramachandran plot.
 
-
+    .. _`Glycan Structure Website`: http://www.glycanstructure.org/fragment-db/howto
+    .. _`Glycosciences Website`: http://www.glycosciences.de/tools/glytorsion/
     ]]></help>
     <expand macro="citations" />
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.yml	Mon Aug 24 16:23:14 2020 -0400
@@ -0,0 +1,36 @@
+ramachandran1:
+  phi:
+    atom1:
+     segid: HET
+     resid: 3
+     name: O5
+    atom2:
+     segid: HET
+     resid: 3
+     name: C1
+    atom3:
+     segid: HET
+     resid: 2
+     name: O4
+    atom4:
+     segid: HET
+     resid: 2
+     name: C4
+  psi:
+    atom1:
+     segid: HET
+     resid: 3
+     name: C1
+    atom2:
+     segid: HET
+     resid: 2
+     name: O4
+    atom3:
+     segid: HET
+     resid: 2
+     name: C4
+    atom4:
+     segid: HET
+     resid: 2
+     name: C5
+  comment: pick visually using VMD using labels. Go to labels, dihedral to see the information about resname resid and atomname and then lookup the segname for ach atom.