Mercurial > repos > immuneml > immuneml_tools
annotate immuneml_simulate_dataset.xml @ 0:629e7e403e19 draft
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
author | immuneml |
---|---|
date | Thu, 01 Jul 2021 11:36:43 +0000 |
parents | |
children | ed3932e6d616 |
rev | line source |
---|---|
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
1 <tool id="immuneml_simulate_dataset" name="Simulate a synthetic immune receptor or repertoire dataset" version="@VERSION@.0"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
2 <description></description> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
3 <macros> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
4 <import>prod_macros.xml</import> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
5 </macros> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
6 <expand macro="requirements" /> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
7 <command><![CDATA[ |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
8 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
9 cp "$yaml_input" yaml_copy && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
10 immune-ml ./yaml_copy ${html_outfile.files_path} --tool DataSimulationTool && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
11 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
12 mv ${html_outfile.files_path}/index.html ${html_outfile} && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
13 mv ${html_outfile.files_path}/immuneML_output.zip $archive |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
14 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
15 ]]> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
16 </command> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
17 <inputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
18 <param name="yaml_input" type="data" format="txt" label="YAML specification" multiple="false"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
19 </inputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
20 <outputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
21 <data format="zip" name="archive" label="Archive: dataset simulation"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
22 <data format="iml_dataset" name="html_outfile" label="ImmuneML dataset (simulated sequences)"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
23 </outputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
24 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
25 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
26 <help><![CDATA[ |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
27 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
28 This Galaxy tool allows you to quickly make a dummy dataset. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
29 The tool generates a SequenceDataset, ReceptorDataset or RepertoireDataset consisting of random CDR3 sequences, which could be used for benchmarking machine learning methods or encodings, |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
30 or testing out other functionalities. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
31 The amino acids in the sequences are chosen from a uniform random distribution, and there is no underlying structure in the sequences. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
32 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
33 You can control: |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
34 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
35 - The amount of sequences in the dataset, and in the case of a RepertoireDataset, the amount of repertoires |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
36 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
37 - The length of the generated sequences |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
38 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
39 - Labels, which can be used as a target when training ML models |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
40 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
41 Note that since these labels are randomly assigned, they do not bear any meaning and it is not possible to train a ML model with high classification accuracy on this data. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
42 Meaningful labels can be added using the `Simulate immune events into existing repertoire/receptor dataset <https://galaxy.immuneml.uio.no/root?tool_id=immuneml_simulation>`_ Galaxy tool. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
43 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
44 For the exhaustive documentation of this tool and an example YAML specification, see the tutorial `How to simulate an AIRR dataset in Galaxy <https://docs.immuneml.uio.no/galaxy/galaxy_simulate_dataset.html>`_. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
45 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
46 **Tool output** |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
47 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
48 This Galaxy tool will produce the following history elements: |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
49 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
50 - ImmuneML dataset (simulated sequences): a sequence, receptor or repertoire dataset which can be used as an input to other immuneML tools. The history element contains a summary HTML page describing general characteristics of the dataset, including the name of the dataset |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
51 (which is used in the dataset definition of a yaml specification), the dataset type and size, available labels, and a link to download the raw data files. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
52 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
53 - Archive: dataset simulation: a .zip file containing the complete output folder as it was produced by immuneML. This folder |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
54 contains the output of the DatasetExport instruction including raw data files. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
55 Furthermore, the folder contains the complete YAML specification file for the immuneML run, the HTML output and a log file. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
56 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
57 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
58 ]]> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
59 </help> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
60 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
61 </tool> |