annotate microsatbed.xml @ 7:f27be15cc58d draft

Uploaded
author fubar
date Sun, 14 Jul 2024 23:34:26 +0000
parents
children 01c16e8fbc91
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
f27be15cc58d Uploaded
fubar
parents:
diff changeset
1 <tool name="STR to bed" id="microsatbed" version="1.3.0" profile="22.05">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
2 <description>Short Tandem Repeats to bed features from fasta</description>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
3 <requirements>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
4 <requirement version="3.12.3" type="package">python</requirement>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
5 <requirement version="2.1.0" type="package">pyfastx</requirement>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
6 <requirement version="1.3.0" type="package">pytrf</requirement>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
7 </requirements>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
8 <required_files>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
9 <include path="find_str.py"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
10 </required_files>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
11 <version_command><![CDATA[python -c "import pytrf; from importlib.metadata import version; print(version('pytrf'))"]]></version_command>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
12 <command><![CDATA[
f27be15cc58d Uploaded
fubar
parents:
diff changeset
13 #if $mode_cond.mode == "NATIVE":
f27be15cc58d Uploaded
fubar
parents:
diff changeset
14 #if $reference_genome.genome_type_select == "history":
f27be15cc58d Uploaded
fubar
parents:
diff changeset
15 pytrf findstr -f '$mode_cond.outformat' -o $bed -r $mode_cond.monomin $mode_cond.dimin $mode_cond.trimin $mode_cond.tetramin $mode_cond.pentamin $mode_cond.hexamin '${reference_genome.fasta}'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
16 #else:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
17 pytrf findstr -f '$mode_cond.outformat' -o $bed -r $mode_cond.monomin $mode_cond.dimin $mode_cond.trimin $mode_cond.tetramin $mode_cond.pentamin $mode_cond.hexamin '${reference_genome.fasta.fields.path}'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
18 #end if
f27be15cc58d Uploaded
fubar
parents:
diff changeset
19 #else:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
20 python '${__tool_directory__}/find_str.py'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
21 #if $reference_genome.genome_type_select == "history":
f27be15cc58d Uploaded
fubar
parents:
diff changeset
22 --fasta '${reference_genome.fasta}'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
23 #else:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
24 --fasta '${reference_genome.fasta.fields.path}'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
25 #end if
f27be15cc58d Uploaded
fubar
parents:
diff changeset
26 --bed '$bed'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
27 #if $mode_cond.mode == "SPECIFIC":
f27be15cc58d Uploaded
fubar
parents:
diff changeset
28 --specific '$mode_cond.specific'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
29 --minreps '$mode_cond.minreps'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
30 #else:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
31 #if "MONO" in $mode_cond.subset:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
32 --mono
f27be15cc58d Uploaded
fubar
parents:
diff changeset
33 #end if
f27be15cc58d Uploaded
fubar
parents:
diff changeset
34 #if "DI" in $mode_cond.subset:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
35 --di
f27be15cc58d Uploaded
fubar
parents:
diff changeset
36 #end if
f27be15cc58d Uploaded
fubar
parents:
diff changeset
37 #if "TRI" in $mode_cond.subset:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
38 --tri
f27be15cc58d Uploaded
fubar
parents:
diff changeset
39 #end if
f27be15cc58d Uploaded
fubar
parents:
diff changeset
40 #if "TETRA" in $mode_cond.subset:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
41 --tetra
f27be15cc58d Uploaded
fubar
parents:
diff changeset
42 #end if
f27be15cc58d Uploaded
fubar
parents:
diff changeset
43 #if "PENTA" in $mode_cond.subset:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
44 --penta
f27be15cc58d Uploaded
fubar
parents:
diff changeset
45 #end if
f27be15cc58d Uploaded
fubar
parents:
diff changeset
46 #if "HEXA" in $mode_cond.subset:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
47 --hexa
f27be15cc58d Uploaded
fubar
parents:
diff changeset
48 #end if
f27be15cc58d Uploaded
fubar
parents:
diff changeset
49 --monomin '$mode_cond.monomin'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
50 --dimin '$mode_cond.dimin'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
51 --trimin '$mode_cond.trimin'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
52 --tetramin '$mode_cond.tetramin'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
53 --pentamin '$mode_cond.pentamin'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
54 --hexamin '$mode_cond.hexamin'
f27be15cc58d Uploaded
fubar
parents:
diff changeset
55 #end if
f27be15cc58d Uploaded
fubar
parents:
diff changeset
56 #end if
f27be15cc58d Uploaded
fubar
parents:
diff changeset
57 ]]></command>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
58 <inputs>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
59 <conditional name="reference_genome">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
60 <param name="genome_type_select" type="select" label="Select a source for fasta sequences to be searched for STRs" help="Options are to choose a built-in genome, or choose any history fasta file">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
61 <option value="indexed">Use a Galaxy server built-in reference genome fasta</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
62 <option value="history" selected="True">Use any fasta file from the current history</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
63 </param>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
64 <when value="indexed">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
65 <param name="fasta" type="select" multiple="false" label="Choose a built-in genome"
f27be15cc58d Uploaded
fubar
parents:
diff changeset
66 help="If the genome you need is not on the list, upload it and select it as a current history fasta">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
67 <options from_data_table="all_fasta"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
68 </param>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
69 </when>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
70 <when value="history">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
71 <param name="fasta" type="data" format="fasta,fasta.gz" optional="false" multiple="false" label="Choose a fasta file from the current history"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
72 </when>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
73 </conditional>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
74 <conditional name="mode_cond">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
75 <param name="mode" type="select" label="Select patterns by motif length; or provide a specific motif pattern to report?" help="Choose *By length:* or *By pattern:* to configure STR selection mode">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
76 <option selected="True" value="ALL">By length: Report all motifs of one or more specified lengths (1-6nt) as bed features</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
77 <option value="SPECIFIC">By motif: Report one or more specific motifs (such as TCA,GC) as bed features</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
78 <option value="NATIVE">All exact STR: use the pytrf findstr native command to a create csv, tsv or gtf output</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
79 </param>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
80 <when value="ALL">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
81 <param name="subset" type="select" multiple="true" optional="false" label="Select at least 1 specific motif length to report"
f27be15cc58d Uploaded
fubar
parents:
diff changeset
82 help="Bed features will be output for every motif of the selected length(s) with the minimum required repeats or more">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
83 <option value="DI" selected="true">All dimers (AC,AG,AT,...)</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
84 <option value="TRI">All trimers (ACG,..)</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
85 <option value="TETRA">All tetramers (ACGT,..)</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
86 <option value="PENTA">All pentamers (ACGTC,..)</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
87 <option value="HEXA">All hexamers (ACGTCG,..)</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
88 <option value="MONO">All monomers (A,C...). Warning! Can produce overwhelming numbers of bed features</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
89 </param>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
90 <param name="dimin" type="integer" value="2" min="1" label="Minimum number of repeats for dimers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
91 <param name="trimin" type="integer" value="2" min="2" label="Minimum number of repeats for trimers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
92 <param name="tetramin" type="integer" value="2" min="2" label="Minimum number of repeats for tetramers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
93 <param name="pentamin" type="integer" value="2" min="2" label="Minimum number of repeats for pentamers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
94 <param name="hexamin" type="integer" value="2" min="2" label="Minimum number of repeats for hexamers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
95 <param name="monomin" type="integer" value="2" min="2" label="Minimum number of repeats for monomers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
96 </when>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
97 <when value="SPECIFIC">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
98 <param name="specific" type="text" label="Supply a specific motif pattern. Separate multiple patterns with commas such as GA,GC"
f27be15cc58d Uploaded
fubar
parents:
diff changeset
99 help="Make bed features only for the nominated specific motifs." optional="false"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
100 <param name="minreps" type="integer" value="2" min="2" label="Minimum number of repeats of each of these motifs to report"
f27be15cc58d Uploaded
fubar
parents:
diff changeset
101 help="Short tandem repeats require 2 or more consecutive motifs by definition. WARNING: If monomers are included, every single matching base will be reported as a STR if minimum repeats = 1!"
f27be15cc58d Uploaded
fubar
parents:
diff changeset
102 optional="false"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
103 </when>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
104 <when value="NATIVE">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
105 <param name="outformat" type="select" optional="false" label="Select the required output format"
f27be15cc58d Uploaded
fubar
parents:
diff changeset
106 help="Pytrf can create GFF, CSV or TSV output files. Documentation is linked in the help section below">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
107 <option value="gff" >GFF</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
108 <option value="csv">Comma separated values</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
109 <option value="tsv" selected="true">Tab separated values</option>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
110 </param>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
111 <param name="monomin" type="integer" value="2" min="2" label="Minimum number of repeats for monomers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
112 <param name="dimin" type="integer" value="2" min="1" label="Minimum number of repeats for dimers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
113 <param name="trimin" type="integer" value="2" min="2" label="Minimum number of repeats for trimers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
114 <param name="tetramin" type="integer" value="2" min="2" label="Minimum number of repeats for tetramers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
115 <param name="pentamin" type="integer" value="2" min="2" label="Minimum number of repeats for pentamers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
116 <param name="hexamin" type="integer" value="2" min="2" label="Minimum number of repeats for hexamers"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
117 </when>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
118 </conditional>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
119 </inputs>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
120 <outputs>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
121 <data name="bed" format="bed" label="STR from $fasta.element_identifier" hidden="false">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
122 <change_format>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
123 <when input="mode_cond.outformat" value="gff" format="gff" />
f27be15cc58d Uploaded
fubar
parents:
diff changeset
124 <when input="mode_cond.outformat" value="csv" format="csv" />
f27be15cc58d Uploaded
fubar
parents:
diff changeset
125 <when input="mode_cond.outformat" value="tsv" format="tabular" />
f27be15cc58d Uploaded
fubar
parents:
diff changeset
126 </change_format>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
127 </data>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
128 </outputs>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
129 <tests>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
130 <test expect_num_outputs="1">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
131 <conditional name="reference_genome">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
132 <param name="genome_type_select" value="history"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
133 <param name="fasta" value="humsamp.fa"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
134 </conditional>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
135 <conditional name="mode_cond">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
136 <param name="mode" value="ALL"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
137 <param name="subset" value="DI,TRI,TETRA,PENTA,HEXA"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
138 <param name="dimin" value="2"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
139 <param name="trimin" value="2"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
140 <param name="tetramin" value="2"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
141 <param name="pentamin" value="2"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
142 <param name="hexamin" value="2"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
143 </conditional>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
144 <output name="bed" value="bed_sample" compare="diff" lines_diff="0"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
145 </test>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
146 <test expect_num_outputs="1">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
147 <conditional name="reference_genome">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
148 <param name="genome_type_select" value="history"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
149 <param name="fasta" value="humsamp.fa"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
150 </conditional>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
151 <conditional name="mode_cond">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
152 <param name="mode" value="SPECIFIC"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
153 <param name="specific" value="GC"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
154 <param name="minreps" value="2"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
155 </conditional>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
156 <output name="bed" value="dibed_sample" compare="diff" lines_diff="0"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
157 </test>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
158 <test expect_num_outputs="1">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
159 <conditional name="reference_genome">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
160 <param name="genome_type_select" value="history"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
161 <param name="fasta" value="mouse.fa"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
162 </conditional>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
163 <conditional name="mode_cond">
f27be15cc58d Uploaded
fubar
parents:
diff changeset
164 <param name="mode" value="NATIVE"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
165 <param name="outformat" value="gff"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
166 <param name="monomin" value="20"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
167 <param name="dimin" value="10"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
168 <param name="trimin" value="5"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
169 <param name="tetramin" value="4"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
170 <param name="pentamin" value="4"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
171 <param name="hexamin" value="2"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
172 </conditional>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
173 <output name="bed" value="nativegff_sample" compare="diff" lines_diff="0"/>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
174 </test>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
175 </tests>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
176 <help><![CDATA[
f27be15cc58d Uploaded
fubar
parents:
diff changeset
177
f27be15cc58d Uploaded
fubar
parents:
diff changeset
178 **Convert short repetitive sequences to bed features**
f27be15cc58d Uploaded
fubar
parents:
diff changeset
179
f27be15cc58d Uploaded
fubar
parents:
diff changeset
180 Microsatellites are usually defined as repeated short DNA patterns in an unbroken sequence.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
181 A microsatellite pattern or *motif* can be any combination nucleotides, typically from 1 to 6nt in length.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
182
f27be15cc58d Uploaded
fubar
parents:
diff changeset
183 This tool allows microsatellite and related features to be selected from a fasta sequence input file, and output into a single bed track, suitable for viewing in a genome browser such as JBrowse2.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
184
f27be15cc58d Uploaded
fubar
parents:
diff changeset
185 All motifs of selected lengths can be reported as individual features in the output bed file, or specific motifs can be provided and all
f27be15cc58d Uploaded
fubar
parents:
diff changeset
186 others will be ignored. In all cases, a minimum required number of repeats can be specified. For example, requiring 2 or more repeats of the trimer *ACG* will report
f27be15cc58d Uploaded
fubar
parents:
diff changeset
187 every sequence of *ACGACG* or *ACGACGACG* or *ACGACGACGACG* and so on, as individual bed features. Similarly, requiring 3 repeats of any trimer will
f27be15cc58d Uploaded
fubar
parents:
diff changeset
188 report every distinct 3 nucleotide pattern, including *ACGACGACG* as well as every other unique 3 nucleotide pattern with 3 sequential repeats or more such, as "CTCCTCCTC*.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
189
f27be15cc58d Uploaded
fubar
parents:
diff changeset
190 For other output formats, the pytrf native command line *findstr* can be used to produce a gff, csv or tsv output containing all exact short tandem repeats, as
f27be15cc58d Uploaded
fubar
parents:
diff changeset
191 described at the end of https://pytrf.readthedocs.io/en/latest
f27be15cc58d Uploaded
fubar
parents:
diff changeset
192
f27be15cc58d Uploaded
fubar
parents:
diff changeset
193 A fasta file must be supplied for processing. A built in genome can be selected, or a fasta file of any kind can be selected from the current history. Note that all
f27be15cc58d Uploaded
fubar
parents:
diff changeset
194 symbols are treated as valid nucleotides by pytrf, so extraneous characters such as *-* or *N* in the input fasta may appear as unexpected bed features. Lower case fasta symbols will be converted
f27be15cc58d Uploaded
fubar
parents:
diff changeset
195 to uppercase, to prevent them being reported as distinct motifs.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
196
f27be15cc58d Uploaded
fubar
parents:
diff changeset
197
f27be15cc58d Uploaded
fubar
parents:
diff changeset
198 **Select motifs by length**
f27be15cc58d Uploaded
fubar
parents:
diff changeset
199
f27be15cc58d Uploaded
fubar
parents:
diff changeset
200 The default tool form setting is to select all dimer motif patterns.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
201
f27be15cc58d Uploaded
fubar
parents:
diff changeset
202 Additional motif lengths from 1 to 6nt can be selected in the multiple-select drop-down list. All features will be returned in a single bed file. For each selected motif length,
f27be15cc58d Uploaded
fubar
parents:
diff changeset
203 the minimum number of repeats required for reporting can be adjusted. **Tandem repeats** are defined as at least 2 of any pattern. This tool allows singleton motifs to be reported,
f27be15cc58d Uploaded
fubar
parents:
diff changeset
204 so is not restricted to short tandem repeats (STR)
f27be15cc58d Uploaded
fubar
parents:
diff changeset
205
f27be15cc58d Uploaded
fubar
parents:
diff changeset
206 **Select motifs by pattern**
f27be15cc58d Uploaded
fubar
parents:
diff changeset
207
f27be15cc58d Uploaded
fubar
parents:
diff changeset
208 This option allows a motif pattern to be specified as a text string such as *CG* or *ATC*. Multiple motifs can be specified as a comma separated string such as *CG,ATC*.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
209 All features will be returned as a single bed file.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
210
f27be15cc58d Uploaded
fubar
parents:
diff changeset
211 The minimum number of repeats for all motifs can be set to match specific requirements.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
212
f27be15cc58d Uploaded
fubar
parents:
diff changeset
213 For example, technical sequencing read bias may be influenced by the density of specific dimers, whether they are repeated or not
f27be15cc58d Uploaded
fubar
parents:
diff changeset
214 such as in https://github.com/arangrhie/T2T-Polish/tree/master/pattern
f27be15cc58d Uploaded
fubar
parents:
diff changeset
215
f27be15cc58d Uploaded
fubar
parents:
diff changeset
216 **Select all perfect STR using pytrf findstr in csv, tsv or gff output format**
f27be15cc58d Uploaded
fubar
parents:
diff changeset
217
f27be15cc58d Uploaded
fubar
parents:
diff changeset
218 This selection runs the pytrf *findstr* option to create gff/csv/tsv outputs as described at the end of https://pytrf.readthedocs.io/en/latest/.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
219
f27be15cc58d Uploaded
fubar
parents:
diff changeset
220 Quoted here:
f27be15cc58d Uploaded
fubar
parents:
diff changeset
221
f27be15cc58d Uploaded
fubar
parents:
diff changeset
222 *A Tandem repeat (TR) in genomic sequence is a set of adjacent short DNA sequence repeated consecutively. The core sequence or repeat unit is generally called motif.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
223 According to the motif length, tandem repeats can be classified as microsatellites and minisatellites. Microsatellites are also known as simple sequence repeats (SSRs)
f27be15cc58d Uploaded
fubar
parents:
diff changeset
224 or short tandem repeats (STRs) with motif length of 1-6 bp. Minisatellites are also sometimes referred to as variable number of tandem repeats (VNTRs) has longer motif length than microsatellites.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
225 Pytrf is a lightweight Python C extension for identification of tandem repeats. The pytrf enables to fastly identify both exact or perfect SSRs.
f27be15cc58d Uploaded
fubar
parents:
diff changeset
226 It also can find generic tandem repeats with any size of motif, such as with maximum motif length of 100 bp. Additionally, it has capability of finding approximate or imperfect tandem repeats*
f27be15cc58d Uploaded
fubar
parents:
diff changeset
227
f27be15cc58d Uploaded
fubar
parents:
diff changeset
228 ]]></help>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
229 <citations>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
230 <citation type="bibtex">@misc{pytrf,
f27be15cc58d Uploaded
fubar
parents:
diff changeset
231 title = {{pytrf} Short tandem repeat finder, Accessed on July 10 2024},
f27be15cc58d Uploaded
fubar
parents:
diff changeset
232 howpublished = {\url{https://github.com/lmdu/pytrf}},
f27be15cc58d Uploaded
fubar
parents:
diff changeset
233 note = {Accessed on July 10 2024}
f27be15cc58d Uploaded
fubar
parents:
diff changeset
234 }</citation>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
235 </citations>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
236 </tool>
f27be15cc58d Uploaded
fubar
parents:
diff changeset
237