comparison deletion_predictor.xml @ 0:aa82b2e54055 draft

planemo upload for repository https://github.com/wm75/mimodd_galaxy_wrappers commit b36048cd608ede0ec6f6559648525c9350caae34-dirty
author wolma
date Sat, 11 Nov 2017 18:19:22 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:aa82b2e54055
1 <tool id="mimodd_delcall" name="MiModD Deletion Calling (for PE data)"
2 version="@MIMODD_WRAPPER_VERSION@">
3 <description>
4 predicts deletions in one or more aligned paired-end read samples based on coverage of the reference genome and on insert sizes
5 </description>
6 <macros>
7 <import>macros.xml</import>
8 </macros>
9 <expand macro="requirements" />
10 <expand macro="stdio" />
11 <expand macro="version_command" />
12 <command><![CDATA[
13 mimodd delcall
14 #for $bam_file in $list_input
15 '$bam_file'
16 #end for
17 '$covfile' -o '$ofile'
18 --index-files
19 #for $bam_file in $list_input
20 '${bam_file.metadata.bam_index}'
21 #end for
22 --max-cov $max_cov --min-size $min_size
23 $include_uncovered
24 $group_by_id
25 --verbose
26 ]]></command>
27
28 <inputs>
29 <param name="list_input" type="data" multiple="true" format="bam"
30 label="Aligned reads input dataset(s)" />
31 <param name="covfile" type="data" format="bcf"
32 label="BCF variant call dataset to extract coverage from"
33 help="Use the MiModD Variant Calling Tool to generate this file."/>
34 <param name="group_by_id" type="boolean" truevalue="-i" falsevalue="" checked="false"
35 label="group reads based on read group id only"
36 help="If selected, reads from different read groups will be treated strictly separate. If turned off, read groups with identical sample names are used together for identifying uncovered regions, but are still treated separately for the prediction of deletions." />
37 <param name="include_uncovered" type="boolean" truevalue="-u" falsevalue="" checked="false"
38 label="include low-coverage regions"
39 help="If selected, regions that fulfill the coverage criteria below, but are not statistically significant deletions, will be included in the output." />
40 <param name="max_cov" type="integer" value="0"
41 label="maximal coverage allowed inside a low-coverage region (default: 0)"
42 help="The maximal coverage at a site allowed to consider it as part of a low-coverage region" />
43 <param name="min_size" type="integer" value="100"
44 label="minimal deletion size (default: 100)"
45 help="A low-coverage region must consist of at least this number of consecutive bases below the maximal coverage to consider it in further analyses."/>
46 </inputs>
47
48 <outputs>
49 <data name="ofile" format="gff" />
50 </outputs>
51
52 <tests>
53 <test>
54 <param name="list_input" value="a.bam" />
55 <param name="covfile" value="a.bcf" />
56 <param name="include_uncovered" value="true" />
57 <assert_command>
58 <has_text text=" -u " />
59 <not_has_text text=" -i " />
60 </assert_command>
61 <output name="ofile" ftype="gff">
62 <assert_contents>
63 <has_n_columns n="9" />
64 <has_line_matching
65 expression="^chrI&#009;MiModD&#009;Uncovered_Region.+" />
66 <has_line_matching
67 expression="^chrII&#009;MiModD&#009;Uncovered_Region.+" />
68 <has_line_matching
69 expression="^chrIII&#009;MiModD&#009;Uncovered_Region.+" />
70 <has_line_matching
71 expression="^chrIV&#009;MiModD&#009;Uncovered_Region.+" />
72 <has_line_matching
73 expression="^chrV&#009;MiModD&#009;Uncovered_Region.+" />
74 <has_line_matching
75 expression="^chrX&#009;MiModD&#009;Uncovered_Region.+" />
76 <has_line_matching
77 expression="^MtDNA&#009;MiModD&#009;Uncovered_Region.+" />
78 </assert_contents>
79 </output>
80 </test>
81 </tests>
82
83 <help><![CDATA[
84 .. class:: infomark
85
86 **What it does**
87
88 The tool predicts deletions from paired-end data in a two-step process:
89
90 1) It finds regions of low-coverage, i.e., candidate regions for deletions, by scanning a BCF file produced by the *Variant Calling* tool.
91
92 The *maximal coverage allowed inside a low-coverage region* and the *minimal deletion size* parameters are used at this step to define what is considered a low-coverage region.
93
94 .. class:: warningmark
95
96 The tool treats genome positions missing from the BCF input as zero coverage, so it is safe to use ONLY with BCF files produced by the *Variant Calling* tool or through other commands that keep the information for all sites.
97
98 2) It assesses every low-coverage region statistically for evidence of it being a real deletion. **This step requires paired-end data** since it relies on shifts in the distribution of read pair insert sizes around real deletions.
99
100 By default, the tool only reports Deletions, i.e., the subset of low-coverage regions that pass the statistical test.
101 If *include low-coverage regions* is selected, regions that failed the test will also be reported.
102
103 With *group reads based on read group id only* selected, as it is by default, grouping of reads into samples is done strictly based on their read group IDs.
104 With the option deselected, grouping is done based on sample names in the first step of the analysis, i.e. the reads of all samples with a shared sample name are used to identify low-coverage regions.
105 In the second step, however, reads will be regrouped by their read group IDs again, i.e. the statistical assessment for real deletions is always done on a per read group basis.
106
107 **TIP:**
108 Deselecting *group reads based on read group id only* can be useful, for example, if you have both paired-end and single-end sequencing data for the same sample.
109
110 In this case, the two sets of reads will usually share a common sample name, but differ in their read groups.
111 With grouping based on sample names, the single-end data can be used together with the paired-end data to identify low-coverage regions, thus increasing overall coverage and reliability of this step.
112 Still, the assessment of deletions will use only the paired-end data (auto-detecting that the single-end reads do not provide insert size information).
113
114 @HELP_FOOTER@
115 ]]></help>
116 <expand macro="citations" />
117 </tool>