comparison immuneml_create_dataset.xml @ 0:629e7e403e19 draft

"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
author immuneml
date Thu, 01 Jul 2021 11:36:43 +0000
parents
children ed3932e6d616
comparison
equal deleted inserted replaced
-1:000000000000 0:629e7e403e19
1 <tool id="immune_ml_dataset" name="Create dataset" version="@VERSION@.0">
2 <description></description>
3 <macros>
4 <import>prod_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command><![CDATA[
8 #set $input_orig_names = []
9 #for $input in $interface_cond.data_input
10 #if $input
11 #set input_orig_names += ["./"+str($input.element_identifier)]
12 ([ -e ./"$input.element_identifier" ] && echo "File '$input.element_identifier' already exists in the input folder, skipping." || ln -s $input "$input.element_identifier") &&
13 #end if
14 #end for
15
16 #if $interface_cond.interface == "simple"
17 python3 '$__tool_directory__/build_dataset_yaml_wrapper.py'
18 --output_path . --file_name specs.yaml
19 #if $interface_cond.dataset_cond.dataset_type == "repertoire"
20 --is_repertoire True
21 --format "$interface_cond.dataset_cond.metadata_cond.data_format"
22 #if $interface_cond.dataset_cond.metadata_cond.data_format != "IReceptor"
23 --metadata_file "$interface_cond.dataset_cond.metadata_cond.metadata_input" &&
24 cp $interface_cond.dataset_cond.metadata_cond.metadata_input "$interface_cond.dataset_cond.metadata_cond.metadata_input.element_identifier"
25 #end if
26 #else
27 --is_repertoire False
28 --format "$interface_cond.dataset_cond.data_format"
29 --metadata_columns "$interface_cond.dataset_cond.metadata_columns"
30 #if $interface_cond.dataset_cond.dataset_type == "sequence"
31 --paired False
32 #elif $interface_cond.dataset_cond.dataset_type == "receptor"
33 --paired True
34 --receptor_chains $interface_cond.dataset_cond.receptor_type
35 #end if
36 #end if
37 && mv ./specs.yaml create_dataset.yaml &&
38 #else
39 cp $yaml_input create_dataset.yaml &&
40 #end if
41
42 immune-ml ./create_dataset.yaml ${html_outfile.files_path} --tool DatasetGenerationTool &&
43
44 mv ${html_outfile.files_path}/index.html ${html_outfile} &&
45 mv ./create_dataset.yaml ${specs}
46
47 ]]>
48 </command>
49 <inputs>
50 <conditional name="interface_cond">
51 <param type="select" name="interface" label="Which interface would you like to use?" display="radio">
52 <option value="simple">Simplified (limited options)</option>
53 <option value="advanced">Advanced (full control through YAML) </option>
54 </param>
55 <when value="simple">
56 <conditional name="dataset_cond">
57 <param type="select" name="dataset_type" label="Dataset type" display="radio" help="Repertoire datasets
58 should be used when making predictions per repertoire, such as predicting a disease state. Sequence or
59 receptor datasets should be used when predicting values for unpaired (single-chain) and paired immune
60 receptors respectively, like antigen specificity.">
61 <option value="repertoire">Repertoire dataset</option>
62 <option value="sequence">Sequence dataset (single chain)</option>
63 <option value="receptor">Receptor dataset (paired chains)</option>
64 </param>
65 <when value="repertoire">
66 <conditional name="metadata_cond">
67 <param type="select" name="data_format" label="Data format" display="radio">
68 <option value="AIRR">AIRR</option>
69 <option value="IReceptor">iReceptor Gateway</option>
70 <option value="ImmunoSEQRearrangement">immunoSEQ: rearrangement-level files</option>
71 <option value="ImmunoSEQSample">immunoSEQ: sample-level files</option>
72 <option value="MiXCR">MiXCR</option>
73 <option value="VDJdb">VDJdb</option>
74 <option value="TenxGenomics">10x Genomics ‘Clonotype consensus annotations’ (CSV)</option>
75 </param>
76 <when value="AIRR">
77 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire ARR file described in the metadata file must be selected under 'Data files'."/>
78 </when>
79 <when value="ImmunoSEQRearrangement">
80 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire immunoSEQ rearrangement file described in the metadata file must be selected under 'Data files'."/>
81 </when>
82 <when value="ImmunoSEQSample">
83 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire immunoSEQ sample file described in the metadata file must be selected under 'Data files'."/>
84 </when>
85 <when value="MiXCR">
86 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire MiXCR file described in the metadata file must be selected under 'Data files'."/>
87 </when>
88 <when value="VDJdb">
89 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire VDJdb file described in the metadata file must be selected under 'Data files'."/>
90 </when>
91 <when value="TenxGenomics">
92 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire 10x Genomics file described in the metadata file must be selected under 'Data files'."/>
93 </when>
94 </conditional>
95 </when>
96 <when value="sequence">
97 <param type="select" name="data_format" label="Data format" display="radio">
98 <option value="AIRR">AIRR</option>
99 <option value="IReceptor">iReceptor Gateway</option>
100 <option value="ImmunoSEQRearrangement">ImmunoSEQ: rearrangement-level files</option>
101 <option value="ImmunoSEQSample">ImmunoSEQ: sample-level files</option>
102 <option value="MiXCR">MiXCR</option>
103 <option value="VDJdb">VDJdb</option>
104 <option value="TenxGenomics">10xGenomics ‘Clonotype consensus annotations’ (CSV)</option>
105 </param>
106 <param type="text" name="metadata_columns" optional="false" label="Metadata columns" help="Please
107 specify the names of the columns that contain metadata. The metadata columns specified here can be
108 used as labels for prediction. Multiple metadata columns may be specified and separated by comma,
109 for example: Epitope,Epitope gene,Epitope species"/>
110 </when>
111 <when value="receptor">
112 <param type="select" name="data_format" label="Data format" display="radio">
113 <option value="AIRR">AIRR</option>
114 <option value="IReceptor">iReceptor Gateway</option>
115 <option value="VDJdb">VDJdb</option>
116 <option value="TenxGenomics">10xGenomics ‘Clonotype consensus annotations’ (CSV)</option>
117 </param>
118 <param type="select" name="receptor_type" label="Receptor type" display="radio">
119 <option value="TRA_TRB">T cell alpha/beta</option>
120 <option value="TRG_TRD">T cell gamma/delta</option>
121 <option value="IGH_IGL">B cell heavy/light</option>
122 <option value="IGH_IGK">B cell heavy/kappa</option>
123 </param>
124 <param type="text" name="metadata_columns" optional="false" label="Metadata columns" help="Please
125 specify the names of the columns that contain metadata. The metadata columns specified here can be
126 used as labels for prediction. Multiple metadata columns may be specified and separated by comma,
127 for example: Epitope,Epitope gene,Epitope species"/>
128 </when>
129 </conditional>
130 <param name="data_input" type="data" multiple="true" label="Data files" min="1" max="2000" help="This field should include individual repertoire or receptor files, or iReceptor zip files. Multiple files can be selected by holding down the control/command or shift key, or by clicking 'browse datasets' (folder button on the right). Important: make sure all the files you want to include in the dataset are highlighted in blue or gray."/>
131 </when>
132 <when value="advanced">
133 <param name="yaml_input" type="data" format="txt" label="YAML specification" multiple="false"/>
134 <param name="data_input" type="data" multiple="true" label="Data and metadata files" optional="true" help="This field should include individual repertoire or receptor files, or iReceptor zip files, and optionally a metadata file. Multiple files can be selected by holding down the control/command or shift key, or by clicking 'browse datasets' (folder button on the right). Important: make sure all the files you want to include in the dataset are highlighted."/>
135 </when>
136 </conditional>
137 </inputs>
138 <outputs>
139 <data format="txt" name="specs" label="create_dataset.yaml"/>
140 <data format="iml_dataset" name="html_outfile" label="ImmuneML dataset"/>
141 </outputs>
142
143
144 <help><![CDATA[
145
146 In Galaxy, an immuneML dataset is simply a Galaxy collection containing all relevant files (including an optional metadata file).
147 The Create dataset Galaxy tool allows users to import data from various formats and create immuneML datasets in Galaxy.
148 These datasets are in an optimized binary (Pickle) format, which ensures that you can quickly import the dataset into
149 Galaxy tools without having to repeatedly specify the import parameters.
150
151 Before creating a dataset, the relevant data files must first be uploaded to the Galaxy interface. This can be done either
152 by uploading files from your local computer (use the 'Upload file' tool under the 'Get local data' menu), or by fetching
153 remote data from the iReceptor Plus Gateway or VDJdb (see `How to import remote AIRR datasets in Galaxy <https://docs.immuneml.uio.no/galaxy/galaxy_import_remote_data.html>`_).
154
155 The imported immuneML dataset is stored in a Galaxy collection, which will appear as a history item on the right side of the screen,
156 and can later be selected as input to other tools.
157
158 The tool has a simplified and an advanced interface. The simplified interface is fully button-based, and relies
159 on default settings for importing datasets. The advanced interface gives full control over import settings through a YAML
160 specification. In most cases, the simplified interface will suffice.
161
162 For the exhaustive documentation of this tool and more information about immuneML datasets, see the tutorial `How to make an immuneML dataset in Galaxy <https://docs.immuneml.uio.no/galaxy/galaxy_dataset.html>`_.
163
164 **Tool output**
165
166 This Galaxy tool will produce the following history elements:
167
168 - ImmuneML dataset: a sequence, receptor or repertoire dataset which can be used as an input to other immuneML tools. The history element contains a summary HTML page describing general characteristics of the dataset, including the name of the dataset
169 (which is used in the dataset definition of a yaml specification), the dataset type and size, available labels, and a link to download the raw data files.
170
171 - create_dataset.yaml: the YAML specification file that was used by immuneML to create the dataset.
172 This file can be downloaded and altered (for example to export files in AIRR format, or use non-standard import parameters),
173 and run again using the 'Advanced' interface.
174
175 ]]>
176 </help>
177
178 </tool>