0
|
1 <tool id="qualityFilter" name="Filter nucleotides" version="1.0.1">
|
|
2 <description> based on quality scores</description>
|
|
3 <command interpreter="python">
|
|
4 quality_filter.py
|
|
5 $input
|
|
6 $out_file1
|
|
7 $primary_species
|
|
8 $mask_species
|
|
9 $score
|
|
10 $mask_char
|
|
11 ${mask_region.region}
|
|
12 #if $mask_region.region == "3"
|
|
13 ${mask_region.lengthr},${mask_region.lengthl}
|
|
14 #elif $mask_region.region == "0"
|
|
15 1
|
|
16 #else
|
|
17 ${mask_region.length}
|
|
18 #end if
|
|
19 ${GALAXY_DATA_INDEX_DIR}/quality_scores.loc
|
|
20 </command>
|
|
21 <inputs>
|
|
22 <param format="maf" name="input" type="data" label="Select data"/>
|
|
23 <param name="primary_species" type="select" label="Use quality scores of" display="checkboxes" multiple="true">
|
|
24 <options>
|
|
25 <filter type="data_meta" ref="input" key="species" />
|
|
26 </options>
|
|
27 </param>
|
|
28 <param name="mask_species" type="select" label="Mask Species" display="checkboxes" multiple="true">
|
|
29 <options>
|
|
30 <filter type="data_meta" ref="input" key="species" />
|
|
31 </options>
|
|
32 </param>
|
|
33 <param name="score" size="10" type="integer" value="20" label="Quality score cut-off" help="Cut-off value of 20 means mask all nucleotides having quality score less than or equal to 20"/>
|
|
34 <param name="mask_char" size="5" type="select" label="Mask character">
|
|
35 <option value="0" selected="true">#</option>
|
|
36 <option value="1">$</option>
|
|
37 <option value="2">^</option>
|
|
38 <option value="3">*</option>
|
|
39 <option value="4">?</option>
|
|
40 <option value="5">N</option>
|
|
41 </param>
|
|
42 <conditional name="mask_region">
|
|
43 <param name="region" type="select" label="Mask region">
|
|
44 <option value="0" selected="true">Only the corresponding nucleotide </option>
|
|
45 <option value="1">Corresponding column + right-side neighbors</option>
|
|
46 <option value="2">Corresponding column + left-side neighbors</option>
|
|
47 <option value="3">Corresponding column + neighbors on both sides</option>
|
|
48 </param>
|
|
49 <when value="0">
|
|
50 </when>
|
|
51 <when value="1">
|
|
52 <param name="length" size="10" type="integer" value="2" label="Number of right-side neighbors"/>
|
|
53 </when>
|
|
54 <when value="2">
|
|
55 <param name="length" size="10" type="integer" value="2" label="Number of left-side neighbors"/>
|
|
56 </when>
|
|
57 <when value="3">
|
|
58 <param name="lengthr" size="10" type="integer" value="2" label="Number of neighbors on right-side" />
|
|
59 <param name="lengthl" size="10" type="integer" value="2" label="Number of neighbors on left-side" />
|
|
60 </when>
|
|
61 </conditional>
|
|
62 </inputs>
|
|
63 <outputs>
|
|
64 <data format="maf" name="out_file1" metadata_source="input"/>
|
|
65 </outputs>
|
|
66 <requirements>
|
|
67 <requirement type="python-module">numpy</requirement>
|
|
68 </requirements>
|
|
69 <tests>
|
|
70 <test>
|
|
71 <param name="input" value="6.maf"/>
|
|
72 <param name="primary_species" value="panTro2"/>
|
|
73 <param name="mask_species" value="hg18"/>
|
|
74 <param name="score" value="50"/>
|
|
75 <param name="mask_char" value="0"/>
|
|
76 <param name="region" value="0" />
|
|
77 <output name="out_file1" file="6_quality_filter.maf"/>
|
|
78 </test>
|
|
79 </tests>
|
|
80 <help>
|
|
81
|
|
82 .. class:: infomark
|
|
83
|
|
84 **What it does**
|
|
85
|
|
86 This tool takes a MAF file as input and filters nucleotides in every alignment block of the MAF file based on their quality/PHRED scores.
|
|
87
|
|
88 -----
|
|
89
|
|
90 .. class:: warningmark
|
|
91
|
|
92 **Note**
|
|
93
|
|
94 Any block/s not containing the primary species (species whose quality scores is to be used), will be omitted.
|
|
95 Also, any primary species whose quality scores are not available in Galaxy will be considered as a non-primary species. This info will appear as a message in the job history panel.
|
|
96
|
|
97 -----
|
|
98
|
|
99 **Example**
|
|
100
|
|
101 - For the following alignment block::
|
|
102
|
|
103 a score=4050.0
|
|
104 s hg18.chrX 3719221 48 - 154913754 tattttacatttaaaataaatatgtaaatatatattttatatttaaaa
|
|
105 s panTro2.chrX 3560945 48 - 155361357 tattttatatttaaaataaagatgtaaatatatattttatatttaaaa
|
|
106
|
|
107 - running this tool with **Primary species as panTro2**, **Mask species as hg18, panTro2**, **Quality cutoff as 20**, **Mask character as #** and **Mask region as only the corresponding position** will return::
|
|
108
|
|
109 a score=4050.0
|
|
110 s hg18.chrX 3719221 48 - 154913754 ###tttac#####a###a#atatgtaaat###tattt#####ttaaaa
|
|
111 s panTro2.chrX 3560945 48 - 155361357 ###tttat#####a###a#agatgtaaat###tattt#####ttaaaa
|
|
112
|
|
113 where, the positions containing # represent panTro2 nucleotides having quality scores less than 20.
|
|
114 </help>
|
|
115 </tool>
|