view checkformat_config.xml @ 5:e7c5811ec12f draft default tip

planemo upload for repository https://github.com/workflow4metabolomics/checkformat.git commit 6b7aa0db6cebdb8acfac0a07291647947260a310
author ethevenot
date Thu, 01 Mar 2018 05:07:38 -0500
parents 9590fac86f63
children
line wrap: on
line source

<tool id="checkFormat" name="Check Format" version="3.0.0">
  <description>Checking/formatting the sample and variable names of the dataMatrix, sampleMetadata, and variableMetadata files</description>
  
  <requirements>
    <requirement type="package" version="1.1_4">r-batch</requirement>
  </requirements>

  <stdio>
    <exit_code range="1:" level="fatal" />
  </stdio>
  
  <command><![CDATA[
  Rscript $__tool_directory__/checkformat_wrapper.R
  dataMatrix_in "$dataMatrix_in"
  sampleMetadata_in "$sampleMetadata_in"
  variableMetadata_in "$variableMetadata_in"
  makeNameL "$makeNameL"

  dataMatrix_out "$dataMatrix_out"
  sampleMetadata_out "$sampleMetadata_out"
  variableMetadata_out "$variableMetadata_out"
  information "$information"
  ]]></command>    
  
  <inputs>
    <param name="dataMatrix_in" type="data" label="Data matrix file" help="" format="tabular" />
    <param name="sampleMetadata_in" type="data" label="Sample metadata file" help="" format="tabular" />
    <param name="variableMetadata_in" type="data" label="Variable metadata file" help="" format="tabular" />
    <param name="makeNameL" label="Make syntactically valid sample and variable names" type="select" help="">
      <option value="TRUE">yes</option>
      <option value="FALSE" selected="true">no</option>
    </param>
  </inputs>
  
  <outputs>
    <data name="dataMatrix_out" label="${tool.name}_${dataMatrix_in.name}" format="tabular" ></data>
    <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>
    <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>
    <data name="information" label="${tool.name}_information.txt" format="txt"/>
  </outputs>
  
  <tests>
    <test>
      <param name="dataMatrix_in" value="input-dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input-sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input-variableMetadata.tsv"/>
      <param name="makeNameL" value="TRUE"/>
      <output name="information">
        <assert_contents>
          <has_text text="Message: Converting sample and variable names to the standard R format" />
	  <has_text text="Warning: The sample and/or variable names or orders from the input tables have been modified" />
        </assert_contents>
      </output>
    </test>
  </tests>
  
  <help><![CDATA[
    
.. class:: infomark
    
**Author** Etienne Thevenot (W4M Core Development Team, MetaboHUB Paris, CEA)
    
---------------------------------------------------
    
.. class:: infomark
    
**Tool updates**
    
See the **NEWS** section at the bottom of this page
    
---------------------------------------------------
    
============
Check Format
============
    
-----------
Description
-----------
    
| **Checks the format (row and column names)** of the dataMatrix, sampleMetadata and variableMetadata tables; in case of difference of orders of the samples and/or variables between (some of) the tables, the **orders from the dataMatrix are permuted** to match those of the sampleMetadata and/or the variableMetadata; sample and variables names can also be modified to be **syntactically valid** for R by selecting the corresponding argument (e.g. an 'X' is added to names starting with a digit, blanks will be converted to '.', etc.).
    
    
-----------------
Workflow position
-----------------
    
.. image:: ./static/images/checkFormat_workflowPositionImage.png

    
-----------
Input files
-----------
 
+----------------------------+---------+
| Parameter : num + label    |  Format |
+============================+=========+
| 1 : Data matrix file       | tabular |
+----------------------------+---------+
| 2 : Sample metadata file   | tabular |
+----------------------------+---------+
| 3 : Variable metadata file | tabular |
+----------------------------+---------+

| The **required formats** for the dataMatrix, sampleMetadata, and variableMetadata files are described in the **HowTo** entitled 'Format Data For Postprocessing' available on the main page of Workflow4Metabolomics.org (http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToFormatDataForPostprocessing_v02.pdf)


----------
Parameters
----------
	  
Data matrix file
	| variable x sample **dataMatrix** tabular separated file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the sample and variable metadata, respectively (see below)
	|

Sample metadata file
	| sample x metadata **sampleMetadata** tabular separated file of the numeric and/or character sample metadata, with . as decimal and NA for missing values
	|
	
Variable metadata file
	| variable x metadata **variableMetadata** tabular separated file of the numeric and/or character variable metadata, with . as decimal and NA for missing values
	|

Make syntactically valid sample and variable names
	| if set to 'yes', sample and variable names will converted to syntactically valid names with the 'make.names' R function when required (e.g. an 'X' is added to names starting with a digit, blanks will be converted to '.', etc.)
	|
  
------------
Output files
------------

dataMatrix_out.tabular
	| dataMatrix data file; may be identical to the input dataMatrix in case no renaming of sample/variable names nor re-ordering of samples/variables (see the 'information' file for the presence/absence of modifications)
	|

sampleMetadata_out.tabular
	| sampleMetadata data file; may be identical to the input sampleMetadata in case no renaming of sample names nor re-ordering of samples (see the 'information' file for the presence/absence of modifications)
	|

variableMetadata_out.tabular
	| variableMetadata data file; may be identical to the input variableMetadata in case no renaming of variable names nor re-ordering of variables (see the 'information' file for the presence/absence of modifications)
	|

information.txt
	| Text file with all messages when error(s) in formats are detected
	|
    
---------------------------------------------------
    
---------------
Working example
---------------
    
.. class:: infomark
  
See the **W4M00001a_sacurine-subset-statistics**, **W4M00001b_sacurine-complete**, **W4M00002_mtbls2**, or **W4M00003_diaplasma** shared histories in the **Shared Data/Published Histories** menu (https://galaxy.workflow4metabolomics.org/history/list_published)
    
---------------------------------------------------
    
----
NEWS
----

CHANGES IN VERSION 3.0.0
========================

NEW FEATURES

Automated re-ordering (if necessary) of sample and/or variable names from dataMatrix based on sampleMetadata and variableMetadata

New argument to make sample and variable names syntactically valid

Output of dataMatrix, sampleMetadata, and variableMetadata files, whether they have been modified or not

CHANGES IN VERSION 2.0.4
========================

INTERNAL MODIFICATIONS

Minor internal modifications
    
CHANGES IN VERSION 2.0.2
========================
    
INTERNAL MODIFICATIONS
    
Test for R code
Planemo running validation
Planemo installing validation
Travis automated testing
Toolshed export
    
  ]]></help>
  
  <citations>
    <citation type="doi">10.1021/acs.jproteome.5b00354</citation>
    <citation type="doi">10.1016/j.biocel.2017.07.002</citation>
    <citation type="doi">10.1093/bioinformatics/btu813</citation>
  </citations>


</tool>