annotate microsatbed/microsatbed.xml @ 0:50a1636fde68 draft default tip

Uploaded
author fubar
date Sun, 14 Jul 2024 02:32:13 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
50a1636fde68 Uploaded
fubar
parents:
diff changeset
1 <tool name="STR to bed" id="microsatbedfubar" version="1.3.0" profile="22.05">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
2 <description>Short Tandem Repeats to bed features from fasta</description>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
3 <requirements>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
4 <requirement version="3.12.3" type="package">python</requirement>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
5 <requirement version="2.1.0" type="package">pyfastx</requirement>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
6 <requirement version="1.3.0" type="package">pytrf</requirement>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
7 </requirements>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
8 <required_files>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
9 <include path="find_str.py"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
10 </required_files>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
11 <version_command><![CDATA[python -c "import pytrf; from importlib.metadata import version; print(version('pytrf'))"]]></version_command>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
12 <command><![CDATA[
50a1636fde68 Uploaded
fubar
parents:
diff changeset
13 #if $mode_cond.mode == "NATIVE":
50a1636fde68 Uploaded
fubar
parents:
diff changeset
14 pytrf findstr -f '$mode_cond.outformat' -o $bed -r $mode_cond.monomin $mode_cond.dimin $mode_cond.trimin $mode_cond.tetramin $mode_cond.pentamin $mode_cond.hexamin '${reference_genome.fasta}'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
15 #else:
50a1636fde68 Uploaded
fubar
parents:
diff changeset
16 python '${__tool_directory__}/find_str.py'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
17 --fasta '${reference_genome.fasta}'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
18 --bed '$bed'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
19 #if $mode_cond.mode == "SPECIFIC":
50a1636fde68 Uploaded
fubar
parents:
diff changeset
20 --specific '$mode_cond.specific'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
21 --minreps '$mode_cond.minreps'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
22 #else:
50a1636fde68 Uploaded
fubar
parents:
diff changeset
23 #if "MONO" in $mode_cond.subset:
50a1636fde68 Uploaded
fubar
parents:
diff changeset
24 --mono
50a1636fde68 Uploaded
fubar
parents:
diff changeset
25 #end if
50a1636fde68 Uploaded
fubar
parents:
diff changeset
26 #if "DI" in $mode_cond.subset:
50a1636fde68 Uploaded
fubar
parents:
diff changeset
27 --di
50a1636fde68 Uploaded
fubar
parents:
diff changeset
28 #end if
50a1636fde68 Uploaded
fubar
parents:
diff changeset
29 #if "TRI" in $mode_cond.subset:
50a1636fde68 Uploaded
fubar
parents:
diff changeset
30 --tri
50a1636fde68 Uploaded
fubar
parents:
diff changeset
31 #end if
50a1636fde68 Uploaded
fubar
parents:
diff changeset
32 #if "TETRA" in $mode_cond.subset:
50a1636fde68 Uploaded
fubar
parents:
diff changeset
33 --tetra
50a1636fde68 Uploaded
fubar
parents:
diff changeset
34 #end if
50a1636fde68 Uploaded
fubar
parents:
diff changeset
35 #if "PENTA" in $mode_cond.subset:
50a1636fde68 Uploaded
fubar
parents:
diff changeset
36 --penta
50a1636fde68 Uploaded
fubar
parents:
diff changeset
37 #end if
50a1636fde68 Uploaded
fubar
parents:
diff changeset
38 #if "HEXA" in $mode_cond.subset:
50a1636fde68 Uploaded
fubar
parents:
diff changeset
39 --hexa
50a1636fde68 Uploaded
fubar
parents:
diff changeset
40 #end if
50a1636fde68 Uploaded
fubar
parents:
diff changeset
41 --monomin '$mode_cond.monomin'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
42 --dimin '$mode_cond.dimin'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
43 --trimin '$mode_cond.trimin'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
44 --tetramin '$mode_cond.tetramin'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
45 --pentamin '$mode_cond.pentamin'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
46 --hexamin '$mode_cond.hexamin'
50a1636fde68 Uploaded
fubar
parents:
diff changeset
47 #end if
50a1636fde68 Uploaded
fubar
parents:
diff changeset
48 #end if
50a1636fde68 Uploaded
fubar
parents:
diff changeset
49 ]]></command>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
50 <inputs>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
51 <conditional name="reference_genome">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
52 <param name="genome_type_select" type="select" label="Select a source for fasta sequences to be searched for STRs" help="Options are to choose a built-in genome, or choose any history fasta file">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
53 <option value="indexed">Use a Galaxy server built-in reference genome fasta</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
54 <option selected="True" value="history">Use any fasta file from the current history</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
55 </param>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
56 <when value="indexed">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
57 <param name="fasta" type="select" optional="false" multiple="false" label="Choose a built-in, or custom reference genome"
50a1636fde68 Uploaded
fubar
parents:
diff changeset
58 help="If the genome you need is not on the list, add a custom genome or choose a genome fasta from the current history">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
59 <options from_data_table="all_fasta">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
60 <filter column="2" type="sort_by"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
61 <validator message="No genomes are available" type="no_options"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
62 </options>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
63 </param>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
64 </when>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
65 <when value="history">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
66 <param name="fasta" type="data" format="fasta,fasta.gz" optional="false" multiple="false" label="Choose a fasta file from the current history"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
67 </when>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
68 </conditional>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
69 <conditional name="mode_cond">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
70 <param name="mode" type="select" label="Select patterns by motif length; or provide a specific motif pattern to report?" help="Choose *By length:* or *By pattern:* to configure STR selection mode">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
71 <option selected="True" value="ALL">By length: Report all motifs of one or more specified lengths (1-6nt) as bed features</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
72 <option value="SPECIFIC">By motif: Report one or more specific motifs (such as TCA,GC) as bed features</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
73 <option value="NATIVE">All exact STR: use the pytrf findstr native command to a create csv, tsv or gtf output</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
74 </param>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
75 <when value="ALL">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
76 <param name="subset" type="select" multiple="true" optional="false" label="Select at least 1 specific motif length to report"
50a1636fde68 Uploaded
fubar
parents:
diff changeset
77 help="Bed features will be output for every motif of the selected length(s) with the minimum required repeats or more">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
78 <option value="DI" selected="true">All dimers (AC,AG,AT,...)</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
79 <option value="TRI">All trimers (ACG,..)</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
80 <option value="TETRA">All tetramers (ACGT,..)</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
81 <option value="PENTA">All pentamers (ACGTC,..)</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
82 <option value="HEXA">All hexamers (ACGTCG,..)</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
83 <option value="MONO">All monomers (A,C...). Warning! Can produce overwhelming numbers of bed features</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
84 </param>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
85 <param name="dimin" type="integer" value="2" min="1" label="Minimum number of repeats for dimers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
86 <param name="trimin" type="integer" value="2" min="2" label="Minimum number of repeats for trimers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
87 <param name="tetramin" type="integer" value="2" min="2" label="Minimum number of repeats for tetramers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
88 <param name="pentamin" type="integer" value="2" min="2" label="Minimum number of repeats for pentamers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
89 <param name="hexamin" type="integer" value="2" min="2" label="Minimum number of repeats for hexamers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
90 <param name="monomin" type="integer" value="2" min="2" label="Minimum number of repeats for monomers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
91 </when>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
92 <when value="SPECIFIC">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
93 <param name="specific" type="text" label="Supply a specific motif pattern. Separate multiple patterns with commas such as GA,GC"
50a1636fde68 Uploaded
fubar
parents:
diff changeset
94 help="Make bed features only for the nominated specific motifs." optional="false"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
95 <param name="minreps" type="integer" value="2" min="2" label="Minimum number of repeats of each of these motifs to report"
50a1636fde68 Uploaded
fubar
parents:
diff changeset
96 help="Short tandem repeats require 2 or more consecutive motifs by definition. WARNING: If monomers are included, every single matching base will be reported as a STR if minimum repeats = 1!"
50a1636fde68 Uploaded
fubar
parents:
diff changeset
97 optional="false"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
98 </when>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
99 <when value="NATIVE">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
100 <param name="outformat" type="select" optional="false" label="Select the required output format"
50a1636fde68 Uploaded
fubar
parents:
diff changeset
101 help="Pytrf can create GFF, CSV or TSV output files">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
102 <option value="gff" selected="true">GFF</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
103 <option value="csv">Comma separated values</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
104 <option value="tsv">Tab separated values</option>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
105 </param>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
106 <param name="dimin" type="integer" value="2" min="1" label="Minimum number of repeats for dimers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
107 <param name="trimin" type="integer" value="2" min="2" label="Minimum number of repeats for trimers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
108 <param name="tetramin" type="integer" value="2" min="2" label="Minimum number of repeats for tetramers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
109 <param name="pentamin" type="integer" value="2" min="2" label="Minimum number of repeats for pentamers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
110 <param name="hexamin" type="integer" value="2" min="2" label="Minimum number of repeats for hexamers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
111 <param name="monomin" type="integer" value="2" min="2" label="Minimum number of repeats for monomers"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
112 </when>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
113 </conditional>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
114 </inputs>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
115 <outputs>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
116 <data name="bed" format="bed" label="STR from $fasta.element_identifier" hidden="false">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
117 <change_format>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
118 <when input="mode_cond.outformat" value="gff" format="gff" />
50a1636fde68 Uploaded
fubar
parents:
diff changeset
119 <when input="mode_cond.outformat" value="csv" format="csv" />
50a1636fde68 Uploaded
fubar
parents:
diff changeset
120 <when input="mode_cond.outformat" value="tsv" format="tabular" />
50a1636fde68 Uploaded
fubar
parents:
diff changeset
121 </change_format>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
122 </data>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
123 </outputs>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
124 <tests>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
125 <test expect_num_outputs="1">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
126 <conditional name="reference_genome">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
127 <param name="genome_type_select" value="history"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
128 <param name="fasta" value="humsamp.fa"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
129 </conditional>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
130 <conditional name="mode_cond">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
131 <param name="mode" value="ALL"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
132 <param name="subset" value="DI,TRI,TETRA,PENTA,HEXA"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
133 <param name="dimin" value="2"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
134 <param name="trimin" value="2"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
135 <param name="tetramin" value="2"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
136 <param name="pentamin" value="2"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
137 <param name="hexamin" value="2"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
138 </conditional>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
139 <output name="bed" value="bed_sample" compare="diff" lines_diff="0"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
140 </test>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
141 <test expect_num_outputs="1">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
142 <conditional name="reference_genome">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
143 <param name="genome_type_select" value="history"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
144 <param name="fasta" value="humsamp.fa"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
145 </conditional>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
146 <conditional name="mode_cond">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
147 <param name="mode" value="SPECIFIC"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
148 <param name="specific" value="GC"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
149 <param name="minreps" value="2"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
150 </conditional>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
151 <output name="bed" value="dibed_sample" compare="diff" lines_diff="0"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
152 </test>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
153 <test expect_num_outputs="1">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
154 <conditional name="reference_genome">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
155 <param name="genome_type_select" value="history"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
156 <param name="fasta" value="mouse.fa"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
157 </conditional>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
158 <conditional name="mode_cond">
50a1636fde68 Uploaded
fubar
parents:
diff changeset
159 <param name="mode" value="NATIVE"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
160 <param name="outformat" value="gff"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
161 <param name="monomin" value="20"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
162 <param name="dimin" value="10"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
163 <param name="trimin" value="5"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
164 <param name="tetramin" value="4"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
165 <param name="pentamin" value="4"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
166 <param name="hexamin" value="2"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
167 </conditional>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
168 <output name="bed" value="nativegff_sample" compare="diff" lines_diff="0"/>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
169 </test>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
170 </tests>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
171 <help><![CDATA[
50a1636fde68 Uploaded
fubar
parents:
diff changeset
172
50a1636fde68 Uploaded
fubar
parents:
diff changeset
173 **Convert short repetitive sequences to bed features**
50a1636fde68 Uploaded
fubar
parents:
diff changeset
174
50a1636fde68 Uploaded
fubar
parents:
diff changeset
175 Microsatellites are usually defined as repeated short DNA patterns in an unbroken sequence.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
176 A microsatellite pattern or *motif* can be any combination nucleotides, typically from 1 to 6nt in length.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
177
50a1636fde68 Uploaded
fubar
parents:
diff changeset
178 This tool allows microsatellite and related features to be selected from a fasta sequence input file, and output into a single bed track, suitable for viewing in a genome browser such as JBrowse2.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
179
50a1636fde68 Uploaded
fubar
parents:
diff changeset
180 All motifs of selected lengths can be reported as individual features in the output bed file, or specific motifs can be provided and all
50a1636fde68 Uploaded
fubar
parents:
diff changeset
181 others will be ignored. In all cases, a minimum required number of repeats can be specified. For example, requiring 2 or more repeats of the trimer *ACG* will report
50a1636fde68 Uploaded
fubar
parents:
diff changeset
182 every sequence of *ACGACG* or *ACGACGACG* or *ACGACGACGACG* and so on, as individual bed features. Similarly, requiring 3 repeats of any trimer will
50a1636fde68 Uploaded
fubar
parents:
diff changeset
183 report every distinct 3 nucleotide pattern, including *ACGACGACG* as well as every other unique 3 nucleotide pattern with 3 sequential repeats or more such, as "CTCCTCCTC*.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
184
50a1636fde68 Uploaded
fubar
parents:
diff changeset
185 For other output formats, the pytrf native command line *findstr* can be used to produce a gff, csv or tsv output containing all exact short tandem repeats, as
50a1636fde68 Uploaded
fubar
parents:
diff changeset
186 described at the end of https://pytrf.readthedocs.io/en/latest
50a1636fde68 Uploaded
fubar
parents:
diff changeset
187
50a1636fde68 Uploaded
fubar
parents:
diff changeset
188 A fasta file must be supplied for processing. A built in genome can be selected, or a fasta file of any kind can be selected from the current history. Note that all
50a1636fde68 Uploaded
fubar
parents:
diff changeset
189 symbols are treated as valid nucleotides by pytrf, so extraneous characters such as *-* or *N* in the input fasta may appear as unexpected bed features. Lower case fasta symbols will be converted
50a1636fde68 Uploaded
fubar
parents:
diff changeset
190 to uppercase, to prevent them being reported as distinct motifs.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
191
50a1636fde68 Uploaded
fubar
parents:
diff changeset
192
50a1636fde68 Uploaded
fubar
parents:
diff changeset
193 **Select motifs by length**
50a1636fde68 Uploaded
fubar
parents:
diff changeset
194
50a1636fde68 Uploaded
fubar
parents:
diff changeset
195 The default tool form setting is to select all dimer motif patterns.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
196
50a1636fde68 Uploaded
fubar
parents:
diff changeset
197 Additional motif lengths from 1 to 6nt can be selected in the multiple-select drop-down list. All features will be returned in a single bed file. For each selected motif length,
50a1636fde68 Uploaded
fubar
parents:
diff changeset
198 the minimum number of repeats required for reporting can be adjusted. **Tandem repeats** are defined as at least 2 of any pattern. This tool allows singleton motifs to be reported,
50a1636fde68 Uploaded
fubar
parents:
diff changeset
199 so is not restricted to short tandem repeats (STR)
50a1636fde68 Uploaded
fubar
parents:
diff changeset
200
50a1636fde68 Uploaded
fubar
parents:
diff changeset
201 **Select motifs by pattern**
50a1636fde68 Uploaded
fubar
parents:
diff changeset
202
50a1636fde68 Uploaded
fubar
parents:
diff changeset
203 This option allows a motif pattern to be specified as a text string such as *CG* or *ATC*. Multiple motifs can be specified as a comma separated string such as *CG,ATC*.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
204 All features will be returned as a single bed file.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
205
50a1636fde68 Uploaded
fubar
parents:
diff changeset
206 The minimum number of repeats for all motifs can be set to match specific requirements.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
207
50a1636fde68 Uploaded
fubar
parents:
diff changeset
208 For example, technical sequencing read bias may be influenced by the density of specific dimers, whether they are repeated or not
50a1636fde68 Uploaded
fubar
parents:
diff changeset
209 such as in https://github.com/arangrhie/T2T-Polish/tree/master/pattern
50a1636fde68 Uploaded
fubar
parents:
diff changeset
210
50a1636fde68 Uploaded
fubar
parents:
diff changeset
211 **Select all perfect STR using pytrf findstr in csv, tsv or gff output format**
50a1636fde68 Uploaded
fubar
parents:
diff changeset
212
50a1636fde68 Uploaded
fubar
parents:
diff changeset
213 This selection runs the pytrf *findstr* option to create gff/csv/tsv outputs as described at the end of https://pytrf.readthedocs.io/en/latest/.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
214
50a1636fde68 Uploaded
fubar
parents:
diff changeset
215 Quoted here:
50a1636fde68 Uploaded
fubar
parents:
diff changeset
216
50a1636fde68 Uploaded
fubar
parents:
diff changeset
217 *A Tandem repeat (TR) in genomic sequence is a set of adjacent short DNA sequence repeated consecutively. The core sequence or repeat unit is generally called motif.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
218 According to the motif length, tandem repeats can be classified as microsatellites and minisatellites. Microsatellites are also known as simple sequence repeats (SSRs)
50a1636fde68 Uploaded
fubar
parents:
diff changeset
219 or short tandem repeats (STRs) with motif length of 1-6 bp. Minisatellites are also sometimes referred to as variable number of tandem repeats (VNTRs) has longer motif length than microsatellites.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
220 Pytrf is a lightweight Python C extension for identification of tandem repeats. The pytrf enables to fastly identify both exact or perfect SSRs.
50a1636fde68 Uploaded
fubar
parents:
diff changeset
221 It also can find generic tandem repeats with any size of motif, such as with maximum motif length of 100 bp. Additionally, it has capability of finding approximate or imperfect tandem repeats*
50a1636fde68 Uploaded
fubar
parents:
diff changeset
222
50a1636fde68 Uploaded
fubar
parents:
diff changeset
223 ]]></help>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
224 <citations>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
225 <citation type="bibtex">@misc{pytrf,
50a1636fde68 Uploaded
fubar
parents:
diff changeset
226 title = {{pytrf} Short tandem repeat finder, Accessed on July 10 2024},
50a1636fde68 Uploaded
fubar
parents:
diff changeset
227 howpublished = {\url{https://github.com/lmdu/pytrf}},
50a1636fde68 Uploaded
fubar
parents:
diff changeset
228 note = {Accessed on July 10 2024}
50a1636fde68 Uploaded
fubar
parents:
diff changeset
229 }</citation>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
230 </citations>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
231 </tool>
50a1636fde68 Uploaded
fubar
parents:
diff changeset
232