view selection.xml @ 2:b0346f5e9e45 draft

planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 7679c75b4a95df45311603e2dceda55b159ffb4b-dirty
author sblanck
date Fri, 29 May 2020 10:48:28 -0400
parents 4d539083cf7f
children
line wrap: on
line source

<tool id="selection" name="Markers selection" force_history_refresh="True" version="0.1.0">
  <command>
    <![CDATA[ 
        Rscript 
        ${__tool_directory__}/selection.R 
  	 --input '$input'  
  	 --response '$response' 
  	 --chrom '$chromosome' 
  	 --new_file_path '$__new_file_path__' 
  	 --settingsSignal '$settingsSNP.signal'
  	#if $settingsSNP.signal == "CN":
  	 --settingsSnp '$settingsSNP.snp' 
  	#end if
  	#if $settingsSNP.signal == "fracB":
  	 --settingsSnp 'none'
  	#end if
  	 --settingsType '$settings.settingsType'
  	#if $settings.settingsType == "tumor":
  	 --settingsType '$tumorcsv' 
  	#end if
  	#if $settings.settingsType == "standard":
  	 --settingsType 'none'
  	#end if
  	 --folds '$folds' 
  	 --settingsLoss '$settingsLoss.loss' 
  	 --outputgraph '$outputgraph' 
  	 --output '$output' 
  	 --pdffigures '$pdffigures' 
  	 --outputlog '$outputlog' 
  	 --log '$log' 
  	 --userId '$__user_id__'
  	#if $settingsLoss.loss == "linear":
  	 --settingsPackage '$settingsLoss.package' 
  	#end if
  	#if $settingsLoss.loss == "logistic":
  	 --settingsPackage'HDPenReg'
  	#end if
  	]]>
  </command>
  <inputs>
    <param name="input" type="data" format="dsf" label="Dataset summary file" help="Summary text file generated by the Data normalization tool"/>
	 
  	<param name="response" type="data" format="csv" label="Data response" help="Data response csv file. See below for more information on file format" />
	
	<param  name="chromosome" type="select" size="6" multiple="true" label="Chromosomes">
      <option value="1">chr 1</option>
      <option value="2">chr 2</option>
      <option value="3">chr 3</option>
      <option value="4">chr 4</option>
      <option value="5">chr 5</option>
      <option value="6">chr 6</option>
      <option value="7">chr 7</option>
      <option value="8">chr 8</option>
      <option value="9">chr 9</option>
      <option value="10">chr 10</option>
      <option value="11">chr 11</option>
      <option value="12">chr 12</option>
      <option value="13">chr 13</option>
      <option value="14">chr 14</option>
      <option value="15">chr 15</option>
      <option value="16">chr 16</option>
      <option value="17">chr 17</option>
      <option value="18">chr 18</option>
      <option value="19">chr 19</option>
      <option value="20">chr 20</option>
      <option value="21">chr 21</option>
      <option value="22">chr 22</option>
      <option value="23">chr 23</option>
      <option value="24">chr 24</option>
      <option value="25">chr 25</option>
    </param>   
	<conditional name="settingsSNP">
    	<param name="signal" type="select" multiple="false" label="Signal you want to work on">
     		<option value="CN">CN</option>
      		<option value="fracB">fracB</option>
    	</param> 
    	<when value="fracB"/>
		<when value="CN">    	
     	<param name="snp" type="select" label="Select Probes">
        	<option value="FALSE">CN and SNP probes</option>
        	<option value="TRUE">Only SNP probes</option>
    	</param>
    	</when>
    </conditional>
    <conditional name="settings">
      <param name="settingsType" type="select" label="Reference" help="">
        <option value="standard">Study without reference</option>
        <option value="tumor">Normal-tumor study</option>
      </param>
      <when value="standard" />
      <when value="tumor">
        <param name="tumorcsv" type="data" format="csv" label="tumor boost csv file" help="Normal-tumor csv file. See below for more information."/>
      </when>
    </conditional>
   
    <param name="folds" type="integer" min="1" value="10" label ="Number of folds for cross validation" help="Integer between 1 and number of file in the .cel file dataset"/>
    <conditional name="settingsLoss">
    <param name="loss" type="select" multiple="false" label="Response type">
      	<option value="linear">Linear</option>
    	<option value="logistic">Logistic</option>
     </param>
      <when value="logistic" />
      <when value="linear">
        <param name="package" type="select" multiple="false" label="Method" help="Either “HDPenReg” or “spikeslab”. Used package in linear case">
      		<option value="HDPenReg">HDPenReg</option>
    		<option value="spikeslab">spikeslab</option>
     	</param> 
     </when>
    </conditional>
    <param name="outputgraph" type="select" multiple="false" label="Plot figures">
      <option value="TRUE">Yes</option>
      <option value="FALSE">No</option>
    </param>
    <param name="outputlog" type="select" label="Output log">
        <option value="TRUE">Yes</option>
        <option value="FALSE">No</option>
    </param>
    
    </inputs>        
  <outputs>
  	<data format="tabular" name="output" label="selection of ${input.name}" />
    <data format="pdf" name="pdffigures" label="figures of SNPs selection of ${input.name}">
    	<filter>outputgraph == "TRUE"</filter>
    	<filter>(settingsLoss['package'] != 'spikeslab')</filter>	
    </data>    
    <data format="log" name="log" label="log of SNPs selection of ${input.name}">
    	<filter>outputlog == "TRUE"</filter>
    </data>  	
  </outputs>
  <stdio>
    <exit_code range="1:"   level="fatal"   description="See logs for more details" />
   </stdio>
  <help>
.. class:: warningmark

Data normalization must be run with the Data Normalization tool prior to SNPs selection. Otherwise, the standalone version can be used to perform marker selection from matrices containing data normalized with tools different from the one proposed in this instance.  

-----
   	
**What it does**
   	    	
This tool selects some relevant markers according to a response using penalized regressions.

Output:
  	
A tabular text file containing 5 columns which describe all the selected SNPs (1 line per SNPs):
	
	- chr: Chromosome containing the selected SNP.
  	- position: Position of the selected SNP.
	- index: Index of the selected SNP.
	- names: Name of the selected SNP.
	- coefficient: Regression coefficient of the selected SNP.

-----

**Data Response csv file**
     	
Data response csv file format:
	
	- The first column contains the names of the different files of the data-set.
     	 
	- The second column contains the response associated with each file. 
     	
	- Column names of these two columns are respectively files and response.

	- Columns are separated by a comma
     	
	- *Extensions of the files (.CEL for example) should be removed*


     	
**Example** 

Let 3 .cel files in the studied dataset ::
     	
     	patient1.cel
     	patient2.cel
     	patient3.cel 
     	
The csv file should look like this ::
     	
     	files,response
     	patient1,1.92145
     	patient2,2.12481
     	patient3,1.23545


-----
  	
**Normal-tumor study**
     	
In cases where normal (control) samples match to tumor samples, they are taken as references to extract copy number profile. In this case, a normal-tumor csv file must be provided :

	- The first column contains the names of the files corresponding to normal samples of the dataset.
     	 
	- The second column contains the names of the tumor samples files. 
     	
	- Column names of these two columns are respectively normal and tumor.
     	
	- Columns are separated by a comma.
     	
	- *Extensions of the files (.CEL for example) should be removed*

     	
**Example** 

Let 6 .cel files in the studied dataset (3 patients, each of them being represented by a couple of normal and tumor cel file.) ::
     	
     	patient1_normal.cel
     	patient1_tumor.cel
     	patient2_normal.cel
     	patient2_tumor.cel
     	patient3_normal.cel 
     	patient3_tumor.cel
      	

The csv file should look like this ::
     	
     	normal,tumor
     	patient1_normal,patient1_tumor
     	patient2_normal,patient2_tumor
     	patient3_normal,patient3_tumor

-----     	  		


   	
**Citation**
		
If you use this tool please cite : 

`Q. Grimonprez, A. Celisse, M. Cheok, M. Figeac, and G. Marot. MPAgenomics : An R package for multi-patients analysis of genomic markers, 2014. Preprint &lt;http://fr.arxiv.org/abs/1401.5035&gt;`_
 
 </help>
</tool>