annotate cpt_renumber_gbk/renumber.xml @ 0:8cac332dbc77 draft default tip

Uploaded
author cpt
date Fri, 17 Jun 2022 13:13:47 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
1 <?xml version="1.0"?>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
2 <tool id="edu.tamu.cpt.genbank.RelabelTags" name="Renumber GenBank Genes" version="0.4" profile="16.04">
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
3 <description>relabels/renumbers GenBank tags according to rules</description>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
4 <macros>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
5 <import>macros.xml</import>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
6 <import>cpt-macros.xml</import>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
7 </macros>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
8 <expand macro="requirements"/>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
9 <command detect_errors="aggressive"><![CDATA[
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
10 python $__tool_directory__/renumber.py
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
11 $file
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
12 --tag_to_update "${tag_to_update}"
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
13 --string_prefix "${string_prefix}"
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
14 --leading_zeros "${leading_zeros}"
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
15 $forceMatch
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
16 --change_table $change_table
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
17
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
18 > $output
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
19
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
20 ]]></command>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
21 <inputs>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
22 <param label="GenBank file" name="file" type="data" format="genbank" />
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
23 <param help="Which tag is used to store gene numbers" label="Tag to update"
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
24 name="tag_to_update" type="text" value="locus_tag"/>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
25 <param help="A string to use as a prefix for the numbering. Will be used as XXXXXXNNN where XXXXXX is the string and NNN is a numerical identifier. Using &quot;display_id&quot; has special meaning, it will use the genome's name/accession number"
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
26 label="String prefix" name="string_prefix" type="text" value="display_id"/>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
27 <param label="Number of leading zeros/padding" name="leading_zeros"
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
28 type="integer" value="3"/>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
29 <param name="forceMatch" label="Force Updated Tags to initially match in addition to location checks. " help="If tag is not present, only location and type checks will be used to infer renumber" type="boolean" truevalue="--forceTagMatch" falsevalue="" checked="True" />
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
30 </inputs>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
31 <outputs>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
32 <data format="genbank" name="output">
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
33 </data>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
34 <data format="tabular" name="change_table">
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
35 </data>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
36 </outputs>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
37 <tests>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
38 <test>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
39 <param name="file" value="MS105.genbank" />
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
40 <param name="leading_zeros" value="10" />
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
41 <param name="forceMatch" value="" />
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
42 <param name="string_prefix" value="MS105_" />
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
43 <output name="genbank" value="renumbered.gbk" />
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
44 <output name="change_table" value="renumbered.tsv" />
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
45 </test>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
46 </tests>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
47 <help>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
48 Gene Renumbering Tool
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
49 =====================
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
50
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
51 Renumber genes in a genome.
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
52
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
53 Subfeatures, such as CDS or intron, will attempt to be grouped with their associated gene, based on location. CDSs and RBSs must share either a start or an end boundary with their parent gene, and also fall entirely within the boundary of said gene. All other features only need to fall within the boundary of a gene.
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
54
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
55 If the tag check is enabled, then whatever qualifier is selected for updating (such as locus_tag), the subfeatures must also have the same value as any canidate parent gene had for that qualifier. This is useful for subfeatures such as introns, which may be inside more than one gene and could potentially get renumbered to the wrong parent in a location-only check. However, if your dataset does not already have a consistent naming convention, other valid heirarchies could get dropped. The log file will list what features got dropped based on tag checks, so be sure to use that to verify all data made it through.
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
56 </help>
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
57 <expand macro="citations-2020" />
8cac332dbc77 Uploaded
cpt
parents:
diff changeset
58 </tool>