0
|
1 <tool id="shrimp_color_wrapper" name="SHRiMP for Color-space" version="1.0.0">
|
|
2 <description>reads mapping against reference sequence </description>
|
|
3 <command interpreter="python">
|
|
4 #if $param.skip_or_full=="skip" #shrimp_color_wrapper.py $input_target $input_query $output1
|
|
5 #else #shrimp_color_wrapper.py $input_target $input_query $output1 $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_crossover_penalty $param.sw_full_hit_threshold $param.sw_vector_hit_threshold
|
|
6 #end if#
|
|
7 </command>
|
|
8 <inputs>
|
|
9 <page>
|
|
10 <param name="input_query" type="data" format="csfasta" label="Align sequencing reads" help="No dataset? Read tip below"/>
|
|
11 <param name="input_target" type="data" format="fasta" label="against reference" />
|
|
12 <conditional name="param">
|
|
13 <param name="skip_or_full" type="select" label="SHRiMP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
|
|
14 <option value="skip">Commonly used</option>
|
|
15 <option value="full">Full Parameter List</option>
|
|
16 </param>
|
|
17 <when value="skip" />
|
|
18 <when value="full">
|
|
19 <param name="spaced_seed" type="text" size="30" value="1111001111" label="Spaced Seed" />
|
|
20 <param name="seed_matches_per_window" type="integer" size="5" value="2" label="Seed Matches per Window" />
|
|
21 <param name="seed_hit_taboo_length" type="integer" size="5" value="4" label="Seed Hit Taboo Length" />
|
|
22 <param name="seed_generation_taboo_length" type="integer" size="5" value="0" label="Seed Generation Taboo Length" />
|
|
23 <param name="seed_window_length" type="float" size="10" value="115.0" label="Seed Window Length" help="in percentage"/>
|
|
24 <param name="max_hits_per_read" type="integer" size="10" value="100" label="Maximum Hits per Read" />
|
|
25 <param name="max_read_length" type="integer" size="10" value="1000" label="Maximum Read Length" />
|
|
26 <param name="kmer" type="integer" size="10" value="-1" label="Kmer Std. Deviation Limit" help="-1 as None"/>
|
|
27 <param name="sw_match_value" type="integer" size="10" value="100" label="S-W Match Value" />
|
|
28 <param name="sw_mismatch_value" type="integer" size="10" value="-150" label="S-W Mismatch Value" />
|
|
29 <param name="sw_gap_open_ref" type="integer" size="10" value="-400" label="S-W Gap Open Penalty (Reference)" />
|
|
30 <param name="sw_gap_open_query" type="integer" size="10" value="-400" label="S-W Gap Open Penalty (Query)" />
|
|
31 <param name="sw_gap_ext_ref" type="integer" size="10" value="-70" label="S-W Gap Extend Penalty (Reference)" />
|
|
32 <param name="sw_gap_ext_query" type="integer" size="10" value="-70" label="S-W Gap Extend Penalty (Query)" />
|
|
33 <param name="sw_crossover_penalty" type="integer" size="10" value="-140" label="S-W Crossover Penalty" />
|
|
34 <param name="sw_full_hit_threshold" type="float" size="10" value="68.0" label="S-W Full Hit Threshold" help="in percentage"/>
|
|
35 <param name="sw_vector_hit_threshold" type="float" size="10" value="60.0" label="S-W Vector Hit Threshold" help="in percentage"/>
|
|
36 </when>
|
|
37 </conditional>
|
|
38 </page>
|
|
39 </inputs>
|
|
40 <outputs>
|
|
41 <data name="output1" format="tabular"/>
|
|
42 </outputs>
|
|
43 <requirements>
|
|
44 <requirement type="binary">rmapper-cs</requirement>
|
|
45 </requirements>
|
|
46 <tests>
|
|
47 <test>
|
|
48 <param name="skip_or_full" value="skip" />
|
|
49 <param name="input_target" value="Ssuis.fasta" ftype="fasta" />
|
|
50 <param name="input_query" value="shrimp_cs_test1.csfasta" ftype="csfasta"/>
|
|
51 <output name="output1" file="shrimp_cs_test1.out" />
|
|
52 </test>
|
|
53 </tests>
|
|
54 <help>
|
|
55
|
|
56 .. class:: warningmark
|
|
57
|
|
58 To use this tool your dataset needs to be in the *csfasta* (as ABI SOLiD color-space sequences) format. Click pencil icon next to your dataset to set the datatype to *csfasta*.
|
|
59
|
|
60
|
|
61 -----
|
|
62
|
|
63 **What it does**
|
|
64
|
|
65 SHRiMP (SHort Read Mapping Package) is a software package for aligning genomic reads against a target genome.
|
|
66
|
|
67
|
|
68 -----
|
|
69
|
|
70 **Input formats**
|
|
71
|
|
72 A multiple color-space file, for example::
|
|
73
|
|
74 >2_263_779_F3
|
|
75 T132032030200202202003211302222202230022110222
|
|
76
|
|
77
|
|
78 -----
|
|
79
|
|
80 **Outputs**
|
|
81
|
|
82 The tool returns the default SHRiMP output::
|
|
83
|
|
84
|
|
85 1 2 3 4 5 6 7 8 9 10
|
|
86 --------------------------------------------------------------------------------------------------------------------
|
|
87 >2_263_779_F3 Streptococcus_suis + 814344 814388 1 45 45 3660 8x19x3x2x6x4x3
|
|
88
|
|
89 where::
|
|
90
|
|
91 1. (>2_263_779_F3) - Read id
|
|
92 2. (Streptococcus_suis) - Reference sequence id
|
|
93 3. (+) - Strand of the read
|
|
94 4. (814344) - Start position of the alignment in the reference
|
|
95 5. (814388) - End position of the alignment in the reference
|
|
96 6. (1) - Start position of the alignment in the read
|
|
97 7. (45) - End position of the alignment in the read
|
|
98 8. (45) - Length of the read
|
|
99 9. (3660) - Score
|
|
100 10. (8x19x3x2x6x4x3) - Edit string
|
|
101
|
|
102
|
|
103 -----
|
|
104
|
|
105 **SHRiMP parameter list**
|
|
106
|
|
107 The commonly used parameters with default value setting::
|
|
108
|
|
109 -s Spaced Seed (default: 111111011111)
|
|
110 The spaced seed is a single contiguous string of 0's and 1's.
|
|
111 0's represent wildcards, or positions which will always be
|
|
112 considered as matching, whereas 1's dictate positions that
|
|
113 must match. A string of all 1's will result in a simple kmer scan.
|
|
114 -n Seed Matches per Window (default: 2)
|
|
115 The number of seed matches per window dictates how many seeds
|
|
116 must match within some window length of the genome before that
|
|
117 region is considered for Smith-Waterman alignment. A lower
|
|
118 value will increase sensitivity while drastically increasing
|
|
119 running time. Higher values will have the opposite effect.
|
|
120 -t Seed Hit Taboo Length (default: 4)
|
|
121 The seed taboo length specifies how many target genome bases
|
|
122 or colours must exist prior to a previous seed match in order
|
|
123 to count another seed match as a hit.
|
|
124 -9 Seed Generation Taboo Length (default: 0)
|
|
125
|
|
126 -w Seed Window Length (default: 115.00%)
|
|
127 This parameter specifies the genomic span in bases (or colours)
|
|
128 in which *seed_matches_per_window* must exist before the read
|
|
129 is given consideration by the Simth-Waterman alignment machinery.
|
|
130 -o Maximum Hits per Read (default: 100)
|
|
131 This parameter specifies how many hits to remember for each read.
|
|
132 If more hits are encountered, ones with lower scores are dropped
|
|
133 to make room.
|
|
134 -r Maximum Read Length (default: 1000)
|
|
135 This parameter specifies the maximum length of reads that will
|
|
136 be encountered in the dataset. If larger reads than the default
|
|
137 are used, an appropriate value must be passed to *rmapper*.
|
|
138 -d Kmer Std. Deviation Limit (default: -1 [None])
|
|
139 This option permits pruning read kmers, which occur with
|
|
140 frequencies greater than *kmer_std_dev_limit* standard
|
|
141 deviations above the average. This can shorten running
|
|
142 time at the cost of some sensitivity.
|
|
143 *Note*: A negative value disables this option.
|
|
144 -m S-W Match Value (default: 100)
|
|
145 The value applied to matches during the Smith-Waterman score calculation.
|
|
146 -i S-W Mismatch Value (default: -150)
|
|
147 The value applied to mismatches during the Smith-Waterman
|
|
148 score calculation.
|
|
149 -g S-W Gap Open Penalty (Reference) (default: -400)
|
|
150 The value applied to gap opens along the reference sequence
|
|
151 during the Smith-Waterman score calculation.
|
|
152 *Note*: Note that for backward compatibility, if -g is set
|
|
153 and -q is not set, the gap open penalty for the query will
|
|
154 be set to the same value as specified for the reference.
|
|
155 -q S-W Gap Open Penalty (Query) (default: -400)
|
|
156 The value applied to gap opens along the query sequence during
|
|
157 the Smith-Waterman score calculation.
|
|
158 -e S-W Gap Extend Penalty (Reference) (default: -70)
|
|
159 The value applied to gap extends during the Smith-Waterman score calculation.
|
|
160 *Note*: Note that for backward compatibility, if -e is set
|
|
161 and -f is not set, the gap exten penalty for the query will
|
|
162 be set to the same value as specified for the reference.
|
|
163 -f S-W Gap Extend Penalty (Query) (default: -70)
|
|
164 The value applied to gap extends during the Smith-Waterman score calculation.
|
|
165 -x
|
|
166 -h S-W Full Hit Threshold (default: 68.00%)
|
|
167 In letter-space, this parameter determines the threshold
|
|
168 score for both vectored and full Smith-Waterman alignments.
|
|
169 Any values less than this quantity will be thrown away.
|
|
170 *Note* This option differs slightly in meaning between letter-space and color-space.
|
|
171 -v
|
|
172
|
|
173
|
|
174 -----
|
|
175
|
|
176 **Reference**
|
|
177
|
|
178 **SHRiMP**: Stephen M. Rumble, Michael Brudno, Phil Lacroute, Vladimir Yanovsky, Marc Fiume, Adrian Dalca. shrimp at cs dot toronto dot edu.
|
|
179
|
|
180 </help>
|
|
181 </tool>
|