annotate td.xml @ 0:3e56058d9552 draft default tip

planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
author mheinzl
date Wed, 16 Oct 2019 04:17:59 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
1 <?xml version="1.0" encoding="UTF-8"?>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
2 <tool id="td" name="TD:" version="1.0.5">
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
3 <description>Tag distance analysis of duplex tags</description>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
4 <requirements>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
5 <requirement type="package" version="2.7">python</requirement>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
6 <requirement type="package" version="1.4.0">matplotlib</requirement>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
7 </requirements>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
8 <command>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
9 python2 '$__tool_directory__/td.py' --inputFile '$inputFile' --inputName1 '$inputFile.name' --sample_size $sampleSize --subset_tag $subsetTag --nproc $nproc $onlyDCS $rel_freq --minFS $minFS --maxFS $maxFS
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
10 $nr_above_bars --output_pdf $output_pdf --output_tabular $output_tabular --output_chimeras_tabular $output_chimeras_tabular
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
11 </command>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
12 <inputs>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
13 <param name="inputFile" type="data" format="tabular" label="Dataset 1: input tags" optional="false" help="Input in tabular format with the family size, tag and the direction of the strand ('ab' or 'ba') for each family."/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
14 <param name="sampleSize" type="integer" label="number of tags in the sample" value="1000" min="0" help="specifies the number of tags in one analysis. If sample size is 0, all tags of the dataset are compared against all tags."/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
15 <param name="minFS" type="integer" label="minimum family size of the tags" min="1" value="1" help="filters the tags after their family size: Families with a smaller size are skipped. Default: min. family size = 1."/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
16 <param name="maxFS" type="integer" label="max family size of the tags" min="0" value="0" help="filters the tags after their family size: Families with a larger size are skipped. If max. family size is 0, no upper bound is defined and the maximum family size in the analysis will be the maximum family size of the whole dataset. Default: max. family size = 0."/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
17 <param name="onlyDCS" type="boolean" label="only DCS in the analysis?" truevalue="" falsevalue="--only_DCS" checked="False" help="Only tags, which have a partner tag (ab and ba) in the dataset, are included in the analysis."/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
18 <param name="rel_freq" type="boolean" label="relative frequency?" truevalue="" falsevalue="--rel_freq" checked="False" help="If True, the relative frequencies instead of the absolute values are displayed in the plots."/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
19 <param name="subsetTag" type="integer" label="shorten tag in the analysis?" value="0" help="By this parameter an analysis with shorter tag length is simulated. If this parameter is 0 (by default), the tags with its original length are used in the analysis."/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
20 <param name="nproc" type="integer" label="number of processors" value="8" help="Number of processor used for computing."/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
21 <param name="nr_above_bars" type="boolean" label="include numbers above bars?" truevalue="--nr_above_bars" falsevalue="" checked="True" help="The absolute and relative values of the data can be included or removed from the plots. "/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
22
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
23 </inputs>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
24 <outputs>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
25 <data name="output_pdf" format="pdf" />
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
26 <data name="output_tabular" format="tabular"/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
27 <data name="output_chimeras_tabular" format="tabular"/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
28
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
29 </outputs>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
30 <tests>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
31 <test>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
32 <param name="inputFile" value="td_data.tab"/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
33 <param name="sampleSize" value="0"/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
34 <output name="output_pdf" file="td_output.pdf" lines_diff="6"/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
35 <output name="output_tabular" file="td_output.tab"/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
36 <output name="output_chimeras_tabular" file="td_chimeras_output.tab"/>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
37 </test>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
38 </tests>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
39 <help> <![CDATA[
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
40 **What it does**
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
41
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
42 Tags used in Duplex Sequencing (DS) are randomized barcodes, e.g 12 base pairs long. Since each DNA fragment is labeled by two tags at each end there are theoretically 4 to the power of (12+12) unique combinations. However, the input DNA in a typical DS experiment contains only ~1,000,000 molecules creating a large tag-to-input excess (4^24
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
43 ≫ 1,000,000). Because of such excess it is highly unlikely to tag distinct input DNA molecules with highly similar barcodes.
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
44
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
45 This tool calculates the number of nucleotide differences among tags, also known as `Hamming distance <https://en.wikipedia.org/wiki/Hamming_distance>`_. In this context the Hamming distance is simply the number of differences between two tags. The tool compares in a randomly selected subset of tags (default n=1000), the difference between each tag of the subset with the tags of the complete dataset. Each tag will differ by a certain number of nucleotides with the other tags; yet the tool uses the smallest difference observed with any other tag.
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
46
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
47 **Input**
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
48
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
49 This tools expects a tabular file with the tags of all families, the family sizes and information about forward (ab) and reverse (ba) strands::
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
50
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
51 1 2 3
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
52 -----------------------------
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
53 1 AAAAAAAAAAAAAAAAATGGTATG ba
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
54 3 AAAAAAAAAAAAAATGGTATGGAC ab
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
55
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
56 .. class:: infomark
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
57
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
58 **How to generate the input**
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
59
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
60 The first step of the `Du Novo Analysis Pipeline <https://doi.org/10.1186/s13059-016-1039-4>`_ is the **Make Families** tool or the **Correct Barcodes** tool that produces output in this form::
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
61
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
62 1 2 3 4
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
63 ------------------------------------------------------
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
64 AAAAAAAAAAAAAAATAGCTCGAT ab read1 CGCTACGTGACTGGGTCATG
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
65 AAAAAAAAAAAAAATAGCTCGAT ab read2 CGCTACGTGACTGGGTCATG
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
66 AAAAAAAAAAAAAATAGCTCGAT ab read3 CGCTACGTGACTGGGTCATG
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
67 AAAAAAAAAAAAAAAAATGGTATG ba read3 CGCTACGTGACTAAAACATG
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
68
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
69 We only need columns 1 and 2. These two columns can be extracted from this dataset using the **Cut** tool::
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
70
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
71 1 2
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
72 ---------------------------
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
73 AAAAAAAAAAAAAAATAGCTCGAT ab
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
74 AAAAAAAAAAAAAAATAGCTCGAT ab
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
75 AAAAAAAAAAAAAAATAGCTCGAT ab
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
76 AAAAAAAAAAAAAAAAATGGTATG ba
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
77
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
78 Next, the tags are sorted in ascending or descending order using the **Sort** tool::
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
79
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
80 1 2
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
81 ---------------------------
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
82 AAAAAAAAAAAAAAAAATGGTATG ba
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
83 AAAAAAAAAAAAAAATAGCTCGAT ab
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
84 AAAAAAAAAAAAAAATAGCTCGAT ab
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
85 AAAAAAAAAAAAAAATAGCTCGAT ab
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
86
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
87 Finally, unique occurencies of each tag are counted. This is done using **Unique lines** tool that adds an additional column with the counts that also represent the family size (column 1)::
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
88
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
89 1 2 3
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
90 -----------------------------
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
91 1 AAAAAAAAAAAAAAAAATGGTATG ba
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
92 3 AAAAAAAAAAAAAATGGTATGGAC ab
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
93
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
94 These data can now be used in this tool.
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
95
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
96 **Output**
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
97
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
98 The output is one PDF file with various plots of the Tag distance, a tabular file with the summarized data of the plots and a tabular file with the chimeras. The PDF file contains several pages:
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
99
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
100 1. This first page contains a graph representing the minimum tag distance (smallest number of differences) categorized after the family sizes.
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
101
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
102 2. The second page contains the same information as the first page, but plots the family size categorized by the minimum tag distance.
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
103
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
104 3. The third page contains the **first step** of the **chimera analysis**, which examines the differences between the tags at both ends of a read (a/b). Chimeras can be distinguished by carrying the same tag at one end combined with multiple different tags at the other end of a read. Here, we describe the calculation of the TDs for only one tag in detail, but the process is repeated for each tag in the sample (default n=1000). First, the tool splits the tag into its upstream and downstream part (named a and b) and compares it with all other a parts of the families in the dataset. Next, the tool estimates the sequence differences (TD) among the a parts and extracts those tags with the smallest difference (TD a.min) and calculates the TD of the b part. The tags with the largest differences are extracted to estimate the maximum TD (TD b.max). The process is repeated starting with the b part instead and estimates TD a.max and TD b.min. Next, we calculate the sum of TD a.min and TD b.max.
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
105
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
106 4. The fourth page contains the **second step** of the **chimera analysis**: the absolute difference (=delta TD) between the partial TDs (TD a.min & TD b.max and TD b.min & TD a.max). The partial TDs of chimeric tags are normally very different which means that multiple combinations of the same a part with different b parts is likely. But it is possible that small delta TDs occur due to a half of a tag that is identical to other halves in the data. For this purpose, the relative difference between the partial TDs is estimated in the next step.
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
107
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
108 5. The fifth page contains the **third step** of the **chimera analysis**: the relative differences of the partial TDs (=relative delta TD). These are calculated as the absolute difference between TD a.min and TD b.max equal to TD delta. Since it is not known whether the absolute difference originates due to a low and a very large TD within a tag or an identical half (TD=0), the tool estimates the relative TD delta as the ratio of the difference to the sum of the partial TDs. In a chimera, it is expected that only one end of the tag contributes the TD of the whole tag. In other words, if the same a part is observed in combination with several different b parts, then one end will have a TD = 0. Thus, the TD difference between the parts (TD a.min - TD b.max) is the same as the sum of the parts (TD a.min + TD b.max) or the ratio of the difference to the sum (relative delta TD = TD a.min - TD b.max / TD a.min + TD b.max) will equal 1 in chimeric families. The plot can be interpreted as the following:
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
109
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
110 - A low relative difference indicates that the total TD is equally distributed in the two partial TDs. This case would be expected, if all tags originate from different molecules.
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
111 - A relative delta TD of 1 means that one part of the tags is identical. Since it is very unlikely that by chance two different tags have a TD of 0, the TDs in the other half are probably artificially introduced and represents chimeric families.
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
112
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
113 6. The sixth page is an analysis only of **chimeric tags** (relative delta TD =1) from step 5.
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
114
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
115 7. The last page is only generated when the parameter "only DCS in the analysis?" is set to **False (NO)**. The graph represents the **TD of the chimeric tags** that form a DCS (complementary ab and ba).
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
116
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
117 .. class:: infomark
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
118
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
119 **Note:**
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
120 Chimeras can be identical in the first or second part of the tag and can have an identical TD with mutliple tags. Therefore, the second column of the output file can have multiple tag entries. The file also contains the family sizes and the direction of the read (ab, ba). The asterisks mark the identical part of the tag.::
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
121
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
122 1 2
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
123 --------------------------------------------------------------------------------------------------
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
124 GAAAGGGAGG GCGCTTCACG 1 ba GCAATCGACG *GCGCTTCACG* 1 ba
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
125 CCCTCCCTGA GGTTCGTTAT 1 ba CGTCCTTTTC *GGTTCGTTAT* 1 ba, GCACCTCCTT *GGTTCGTTAT* 1 ba
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
126 ATGCTGATCT CGAATGCATA 55 ba, 59 ab AGGTGCCGCC *CGAATGCATA* 27 ba, *ATGCTGATCT* GAATGTTTAC 1 ba
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
127
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
128 **About Author**
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
129
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
130 Author: Monika Heinzl
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
131
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
132 Department: Institute of Biophysics, Johannes Kepler University Linz, Austria
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
133
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
134 Contact: monika.heinzl@edumail.at
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
135
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
136 ]]>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
137
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
138 </help>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
139 <citations>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
140 <citation type="bibtex">
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
141 @misc{duplex,
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
142 author = {},
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
143 year = {},
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
144 title = {}
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
145 }
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
146 </citation>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
147 </citations>
3e56058d9552 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff changeset
148 </tool>