comparison preprocess.xml @ 0:4d539083cf7f draft

planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 689d0d8dc899a683ee18700ef385753559850233-dirty
author sblanck
date Tue, 12 May 2020 10:40:36 -0400
parents
children 3fcbb8030fcc
comparison
equal deleted inserted replaced
-1:000000000000 0:4d539083cf7f
1 <tool id="preprocess" name="Data Normalization" force_history_refresh="True" version="1.1.0">
2 <requirements>
3 <!--requirement type="set_environment">R_SCRIPT_PATH</requirement-->
4 <requirement type="package" version="1.1.2">mpagenomics</requirement>
5 </requirements>
6 <!--command interpreter="python"-->
7 <command>
8 <![CDATA[
9 Rscript
10 ${__tool_directory__}/preprocess.R
11 --summary '$summary'
12 --new_file_path '$__new_file_path__'
13 --inputcdffull_name '$inputcdffull.name'
14 --inputufl_name '$inputufl.name'
15 --inputugp_name '$inputugp.name'
16 --inputacs_name '$inputacs.name'
17 --inputcdffull '$inputcdffull'
18 --inputufl '$inputufl'
19 --inputugp '$inputugp'
20 --inputacs '$inputacs'
21 --dataSetName '$datasetName'
22 #if $settings.settingsType == "tumor":
23 --tumorcsv '$tumorcsv'
24 #end if
25 #if $settings.settingsType == "standard":
26 --tumorcsv 'none'
27 #end if
28 --settingsType '$settings.settingsType'
29 --outputgraph '$outputgraph'
30 --zipfigures '$zipfigures'
31 --outputlog '$outputlog'
32 --log '$log'
33 --user_id '$__user_id__'
34 --input "#for $input in $inputs# $input;$input.name, #end for#"
35 ]]>
36
37 </command>
38 <inputs>
39 <param name="datasetName" type="text" label="Dataset Name"/>
40 <param name="inputs" type="data" format="cel" multiple="True" label="Cel files dataset" help="Cel files dataset previously uploaded with the Multiple File Datasets tool."/>
41 <param name="inputcdffull" type="data" format="cdf" label="cdf file" help=".cdf file name must comply with the following format : &lt; chiptype &gt;,&lt; tag &gt;.cdf (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full.cdf)." />
42 <param name="inputufl" type="data" format="ufl" label="ufl file" help=".ufl file name must start with &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ufl)."/>
43 <param name="inputugp" type="data" format="ugp" label="ugp file" help=".ugp file name must start with &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ugp)."/>
44 <param name="inputacs" type="data" format="acs" label="acs file" help=".acs file name must start with &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,HB20080710.acs)."/>
45 <conditional name="settings">
46 <param name="settingsType" type="select" label="Reference">
47 <option value="standard">Study without reference</option>
48 <option value="tumor">Normal-tumor study with TumorBoost</option>
49 </param>
50 <when value="standard" />
51 <when value="tumor">
52 <param name="tumorcsv" type="data" format="csv" label="TumorBoost csv file" help="Normal-tumor csv file. See below for more information."/>
53 </when>
54 </conditional>
55 <!--param name="outputgraph" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output figures" /-->
56 <!--param name="outputlog" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output log" /-->
57 <param name="outputgraph" type="select" label="Output figures">
58 <option value="TRUE">Yes</option>
59 <option value="FALSE">No</option>
60 </param>
61 <param name="outputlog" type="select" label="Output log">
62 <option value="TRUE">Yes</option>
63 <option value="FALSE">No</option>
64 </param>
65 <!--param name="chipType" type="text" label="chipType" /-->
66 <!--param name="workspace" type="text" label="Workspace"/-->
67 </inputs>
68
69 <outputs>
70 <!-- Would like to make this hidden or not appear all together, but
71 variable outputs require a primary dataset. If hidden refresh
72 doesn't occur.
73 -->
74 <data format="dsf" name="summary" label="Dataset summary file of ${datasetName}" />
75 <data format="zip" name="zipfigures" label="figures of normalization of ${datasetName}">
76 <filter>outputgraph == "TRUE"</filter>
77 </data>
78 <data format="log" name="log" label="log of normalization ${datasetName}">
79 <filter>outputlog == "TRUE"</filter>
80 </data>
81 </outputs>
82
83 <stdio>
84 <exit_code range="1:" level="fatal" description="See logs for more details" />
85 </stdio>
86
87 <help>
88
89 **What it does**
90
91 This preprocessing step consists in a correction of biological and technical biaises due to the experiment. Raw data from Affymetrix arrays are provided in different CEL files. These data must be normalized before statistical analysis.
92 The pre-processing is proposed as a wrapper of aroma.* packages (using CRMAv2 and TumorBoost when appropriate). Note that this implies that the pre-processing step is only available for Affymetrix arrays.
93
94 -----
95
96 **Chip file naming conventions**
97
98 Chip filenames must strictly follow the following rules :
99
100 - *.cdf* filename must comply with the following format : &lt; chiptype &gt;,&lt; tag &gt;.cdf (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full.cdf). Note the use of a comma (not a point) between &lt;chiptype&gt; and the tag "Full".
101
102 - *.ufl* filename must start with &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ufl).
103
104 - *.ugp* filename must start with &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ugp).
105
106 - *.acs* file name must start with &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,HB20080710.acs).
107
108 -----
109
110 **Normal-tumor study with TumorBoost**
111
112 In cases where normal (control) samples match to tumor samples, normalization can be improved using TumorBoost. In this case, a normal-tumor csv file must be provided :
113
114 - The first column contains the names of the files corresponding to normal samples of the dataset.
115
116 - The second column contains the names of the tumor samples files.
117
118 - Column names of these two columns are respectively normal and tumor.
119
120 - Columns are separated by a comma.
121
122 - *Extensions of the files (.CEL for example) should be removed*
123
124
125
126 **Example**
127
128 Let 6 .cel files in the dataset studied (3 patients, each of them being represented by a couple of normal and tumor cel files.) ::
129
130 patient1_normal.cel
131 patient1_tumor.cel
132 patient2_normal.cel
133 patient2_tumor.cel
134 patient3_normal.cel
135 patient3_tumor.cel
136
137
138 The csv file should look like this ::
139
140 normal,tumor
141 patient1_normal,patient1_tumor
142 patient2_normal,patient2_tumor
143 patient3_normal,patient3_tumor
144
145
146 -----
147
148 **Citation**
149
150 When using this tool, please cite :
151
152 `Q. Grimonprez, A. Celisse, M. Cheok, M. Figeac, and G. Marot. MPAgenomics : An R package for multi-patients analysis of genomic markers, 2014. Preprint &lt;http://fr.arxiv.org/abs/1401.5035&gt;`_
153
154 As CRMAv2 normalization is used, please also cite `H. Bengtsson, P. Wirapati, and T. P. Speed. A single-array preprocessing method for estimating full-resolution raw copy numbers from all Affymetrix genotyping arrays including GenomeWideSNP 5 &amp; 6. Bioinformatics, 5(17):2149–2156, 2009. &lt;http://bioinformatics.oxfordjournals.org/content/25/17/2149.short&gt;`_
155
156 When using TumorBoost to improve normalization in a normal-tumor study, please cite `H. Bengtsson, P. Neuvial, and T. P. Speed. TumorBoost: Normalization of allele-specific tumor copy numbers from a single pair of tumor-normal genotyping microarrays. BMC Bioinformatics, 11, 2010 &lt;http://www.biomedcentral.com/1471-2105/11/245&gt;`_
157
158 </help>
159 </tool>