view extract_rmsd.xml @ 1:8f6ad93973cb draft

"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 45fe75a3a8ca80f799c85e194429c4c7f38bb5f6"
author chemteam
date Wed, 28 Oct 2020 21:41:04 +0000
parents 743bd6aa3c7a
children 589f8ef21e58
line wrap: on
line source

<tool id="mdanalysis_extract_rmsd" name="Extract RMSD distance matrix data" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
    <description>from MD ensemble with MDAnalysis</description>
    <macros>
        <import>macros.xml</import>
        <token name="@GALAXY_VERSION@">1</token>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        #for t in $strs:
            echo $t &>> ./strs.txt &&
        #end for
        #for t in $trajs:
            echo $t &>> ./trajs.txt &&
        #end for
        
        python '$__tool_directory__/extract_rmsd.py'
            --trajs trajs.txt
            --strs strs.txt
            --traj-format '$trajs[0].ext'
            --str-format '$strs[0].ext'
            --outfile '$output'
            --group '$group'
            --start '$start'
            --end '$end'
            --step '$step'

]]></command>
    <inputs>
        <param type="data_collection" name="strs" label="Input structures" format="pdb,gro"/>
        <param type="data_collection" name="trajs" label="Input trajectories" format="xtc,dcd,trr"/>
        <param name='group' type='text' label='Group for RMSD calculation' />
        <param name="start" type="integer" min="0" value="0" label="First trajectory frame for RMSD calculation" />
        <param name="end" type="integer"  min="0" value="0" label="End trajectory frame for RMSD calculation" />
        <param name="step" type="integer"  min="1" value="1" label="Frequency of trajectory frame sampling for RMSD calculation" />
    </inputs>
    <outputs>
        <data name="output" format="json" />
    </outputs>
    <tests>
        <test>
            <param name="strs">
                <collection type="list">
                    <element name="str1" ftype="gro" value="test.gro" />
                    <element name="str2" ftype="gro" value="test.gro" />
                </collection>
            </param>
            <param name="trajs">
                <collection type="list">
                    <element name="traj1" ftype="xtc" value="test.xtc" />
                    <element name="traj2" ftype="xtc" value="test.xtc" />
                </collection>
            </param>

            <param name="group" value="resname BGLC" />
            <param name="start" value="0" />
            <param name="end" value="15" />
            <param name="step" value="1" />
            <output name="output"> 
                <assert_contents>
                    <has_text text="0.0" n="20"/>
                    <has_size value="1126" />
                    <has_n_lines n="74" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

This tool takes collections of MD structures and trajectories and inputs and performs the following steps:
  - aligns them to a reference structure
  - calculates RMSD differences for a selected group of atoms between all possible pairs of trajectories at all time points
  - returns RMSD data as a three-dimensional tensor.

Note: in an older version of this tool trajectories were aligned to a reference group prior to RMSD calculation. This is no longer supported; you should perform alignment yourself using a more efficient tool such as 'Modify/convert GROMACS trajectories'.

_____


.. class:: infomark

**Input**

       - Collection of structure files  (PDB, GRO).
       - Collection of trajectory files  (DCD, XTC, TRR).
       - Single structure file for alignment.
       - User selection of fitting group, alignment group, start and end frames of the trajectory, and a frame step for the calculation.

_____


.. class:: infomark

**Output**

The output consists of a three-dimensional numpy array saved in JSON format, with dimensions N x N x t, where N is the number of trajectories and t is the number of time frames. Thus, the file effectively contains multiple distance matrices (one for each time step) representing the RMSD between all pairs of trajectories for a chosen group of atoms.

It may be more useful to flatten the tensor to a two-dimensional matrix by averaging or slicing on the time axis; this can be achieved using the 'Hierarchical clustering' tool.

    ]]></help>
    <expand macro="citations" />
</tool>