changeset 1:8f6ad93973cb draft

"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 45fe75a3a8ca80f799c85e194429c4c7f38bb5f6"
author chemteam
date Wed, 28 Oct 2020 21:41:04 +0000 (2020-10-28)
parents 743bd6aa3c7a
children 589f8ef21e58
files extract_rmsd.py extract_rmsd.xml
diffstat 2 files changed, 22 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- a/extract_rmsd.py	Mon Aug 24 16:41:41 2020 -0400
+++ b/extract_rmsd.py	Wed Oct 28 21:41:04 2020 +0000
@@ -2,29 +2,23 @@
 import json
 
 import MDAnalysis as m
-from MDAnalysis.analysis import align, rms
-from MDAnalysis.analysis.base import AnalysisFromFunction
-from MDAnalysis.coordinates.memory import MemoryReader
+from MDAnalysis.analysis import rms
 
 import numpy as np
 
 
-def calc_rmsd(str_files, traj_files, ref_str, str_format, traj_format,
-              ref_str_format, filepath_out, group, start, end, step,
-              fitting_atoms):
+def calc_rmsd(str_files, traj_files, str_format, traj_format, filepath_out,
+              group, start, end, step):
     """
     the function will cycle through range 0 to no_t and load all files found.
 
     str_files: text file with filepaths for structures, one on each line
     traj_files: text file with filepaths for trajectories, one on each line
-    ref_str: reference structure for fitting
     filepath_in: directory where the files are located
     filepath_out: pickle file where results (3D matrix) should be saved to
 
     group: atoms for which RMSD should be calculated;
         use the MDAnalysis selection language
-    fitting_atoms: atoms used for str alignment prior to RMSD calculation;
-        use the MDAnalysis selection language
 
     start: first trajectory frame to calculate RMSD
     end: last trajectory frame to calculate RMSD
@@ -49,22 +43,10 @@
     universes = {}
 
     for traj in range(no_t):
-        mobile = m.Universe(str_file_list[traj], traj_file_list[traj],
-                            format=traj_format, topology_format=str_format)
-        ref = m.Universe(ref_str, topology_format=ref_str_format)
-
-        mobile.trajectory[-1]  # set mobile trajectory to last frame
-        ref.trajectory[0]  # set reference trajectory to first frame
-
-        # perform alignment
-        align.AlignTraj(mobile, ref, select=fitting_atoms,
-                        in_memory=True).run()
-
-        grp = mobile.select_atoms(group)
-        universes[traj] = m.core.universe.Merge(grp)  # create Universe w grp
-        coordinates = AnalysisFromFunction(lambda ag: ag.positions.copy(),
-                                           grp).run().results  # write to uv
-        universes[traj].load_new(coordinates, format=MemoryReader)
+        # We no longer align here, users should do this themselves.
+        universes[traj] = m.Universe(str_file_list[traj], traj_file_list[traj],
+                                     format=traj_format,
+                                     topology_format=str_format)
 
     print("All trajs loaded by MDAnalysis")
 
@@ -72,19 +54,14 @@
     for traj1 in range(no_t):
         print("Calculating differences for traj {}".format(traj1))
         for traj2 in range(traj1):
-
-            u1 = universes[traj1]
-            u2 = universes[traj2]
-
-            l1 = u1.select_atoms(group)
-            l2 = u2.select_atoms(group)
-
-            rmsd = rms.RMSD(l1, l2)
-
-            rmsd.run()
-
-            data[traj1, traj2] = rmsd.rmsd[:, 2]
-            data[traj2, traj1] = rmsd.rmsd[:, 2]
+            for frame in range(data.shape[2]):
+                universes[traj1].trajectory[frame]
+                universes[traj2].trajectory[frame]
+                A = universes[traj1].select_atoms(group).positions
+                B = universes[traj2].select_atoms(group).positions
+                r = rms.rmsd(A, B)
+                data[traj1, traj2, frame] = r
+                data[traj2, traj1, frame] = r
 
     with open(filepath_out, 'w') as f:
         json.dump(data.tolist(), f, indent=4, sort_keys=True)
@@ -99,19 +76,13 @@
                         help='File containing trajectory filepaths.')
     parser.add_argument("--strs",
                         help='File containing structure filepaths.')
-    parser.add_argument("--ref-str",
-                        help='File containing reference structure.')
     parser.add_argument('--traj-format', required=True,
                         help='Trajectory format.')
     parser.add_argument("--str-format", help='Structure format.')
-    parser.add_argument("--ref-str-format",
-                        help='Reference structure format.')
     parser.add_argument('-o', '--outfile',
                         help="Path to the output JSON file")
     parser.add_argument('--group', help="Atoms for which RMSD should be"
                         "calculated in MDAnalysis selection language")
-    parser.add_argument('--fitting', help="Fitting atoms for alignment"
-                        "prior to RMSD calculation")
     parser.add_argument('--start', type=int,
                         help="First trajectory frame to calculate RMSD")
     parser.add_argument('--end', type=int,
@@ -120,9 +91,9 @@
                         help="Frame sampling frequency for RMSD calculation")
     args = parser.parse_args()
 
-    calc_rmsd(args.strs, args.trajs, args.ref_str, args.str_format,
-              args.traj_format, args.ref_str_format, args.outfile,
-              args.group, args.start, args.end, args.step, args.fitting)
+    calc_rmsd(args.strs, args.trajs, args.str_format,
+              args.traj_format, args.outfile,
+              args.group, args.start, args.end, args.step)
 
 
 if __name__ == "__main__":
--- a/extract_rmsd.xml	Mon Aug 24 16:41:41 2020 -0400
+++ b/extract_rmsd.xml	Wed Oct 28 21:41:04 2020 +0000
@@ -2,7 +2,7 @@
     <description>from MD ensemble with MDAnalysis</description>
     <macros>
         <import>macros.xml</import>
-        <token name="@GALAXY_VERSION@">0</token>
+        <token name="@GALAXY_VERSION@">1</token>
     </macros>
     <expand macro="requirements"/>
     <command detect_errors="exit_code"><![CDATA[
@@ -16,13 +16,10 @@
         python '$__tool_directory__/extract_rmsd.py'
             --trajs trajs.txt
             --strs strs.txt
-            --ref-str '$refstr'
             --traj-format '$trajs[0].ext'
             --str-format '$strs[0].ext'
-            --ref-str-format '${refstr.ext}'
             --outfile '$output'
             --group '$group'
-            --fitting '$fitting'
             --start '$start'
             --end '$end'
             --step '$step'
@@ -31,9 +28,7 @@
     <inputs>
         <param type="data_collection" name="strs" label="Input structures" format="pdb,gro"/>
         <param type="data_collection" name="trajs" label="Input trajectories" format="xtc,dcd,trr"/>
-        <param name="refstr" type="data" format="pdb,gro" label="Reference structure" help="Structure for aligning all trajectories against."/>
         <param name='group' type='text' label='Group for RMSD calculation' />
-        <param name='fitting' type='text' label='Group for alignment prior to RMSD calculation' />
         <param name="start" type="integer" min="0" value="0" label="First trajectory frame for RMSD calculation" />
         <param name="end" type="integer"  min="0" value="0" label="End trajectory frame for RMSD calculation" />
         <param name="step" type="integer"  min="1" value="1" label="Frequency of trajectory frame sampling for RMSD calculation" />
@@ -56,8 +51,6 @@
                 </collection>
             </param>
 
-            <param name="refstr" ftype="pdb" value="test.pdb" />
-            <param name="fitting" value="protein" />
             <param name="group" value="resname BGLC" />
             <param name="start" value="0" />
             <param name="end" value="15" />
@@ -65,7 +58,7 @@
             <output name="output"> 
                 <assert_contents>
                     <has_text text="0.0" n="20"/>
-                    <has_size value="1588" />
+                    <has_size value="1126" />
                     <has_n_lines n="74" />
                 </assert_contents>
             </output>
@@ -81,6 +74,8 @@
   - calculates RMSD differences for a selected group of atoms between all possible pairs of trajectories at all time points
   - returns RMSD data as a three-dimensional tensor.
 
+Note: in an older version of this tool trajectories were aligned to a reference group prior to RMSD calculation. This is no longer supported; you should perform alignment yourself using a more efficient tool such as 'Modify/convert GROMACS trajectories'.
+
 _____