0
|
1 <tool id="Extract_features1" name="Extract features">
|
|
2 <description> from GFF data</description>
|
|
3 <command interpreter="python">extract_GFF_Features.py $input1 $out_file1 ${column_choice.col} ${column_choice.feature}</command>
|
|
4 <inputs>
|
|
5 <param format="gff" name="input1" type="data" label="Select GFF data"/>
|
|
6 <conditional name="column_choice">
|
|
7 <param name="col" type="select" label="From">
|
|
8 <option value="0" selected="true">Column 1 / Sequence name</option>
|
|
9 <option value="1">Column 2 / Source</option>
|
|
10 <option value="2">Column 3 / Feature</option>
|
|
11 <option value="6">Column 7 / Strand</option>
|
|
12 <option value="7">Column 8 / Frame</option>
|
|
13 </param>
|
|
14 <when value="0">
|
|
15 <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
|
16 <options from_dataset="input1">
|
|
17 <column name="name" index="0"/>
|
|
18 <column name="value" index="0"/>
|
|
19 <filter type="unique_value" name="unique" column="0"/>
|
|
20 </options>
|
|
21 </param>
|
|
22 </when>
|
|
23 <when value="1">
|
|
24 <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
|
25 <options from_dataset="input1">
|
|
26 <column name="name" index="1"/>
|
|
27 <column name="value" index="1"/>
|
|
28 <filter type="unique_value" name="unique" column="1"/>
|
|
29 </options>
|
|
30 </param>
|
|
31 </when>
|
|
32 <when value="2">
|
|
33 <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
|
34 <options from_dataset="input1">
|
|
35 <column name="name" index="2"/>
|
|
36 <column name="value" index="2"/>
|
|
37 <filter type="unique_value" name="unique" column="2"/>
|
|
38 </options>
|
|
39 </param>
|
|
40 </when>
|
|
41 <when value="6">
|
|
42 <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
|
43 <options from_dataset="input1">
|
|
44 <column name="name" index="6"/>
|
|
45 <column name="value" index="6"/>
|
|
46 <filter type="unique_value" name="unique" column="6"/>
|
|
47 </options>
|
|
48 </param>
|
|
49 </when>
|
|
50 <when value="7">
|
|
51 <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
|
52 <options from_dataset="input1">
|
|
53 <column name="name" index="7"/>
|
|
54 <column name="value" index="7"/>
|
|
55 <filter type="unique_value" name="unique" column="7"/>
|
|
56 </options>
|
|
57 </param>
|
|
58 </when>
|
|
59 </conditional>
|
|
60 </inputs>
|
|
61 <outputs>
|
|
62 <data format="input" name="out_file1" metadata_source="input1"/>
|
|
63 </outputs>
|
|
64 <tests>
|
|
65 <test>
|
|
66 <param name="input1" value="5.gff"/>
|
|
67 <param name="col" value="0" />
|
|
68 <param name="feature" value="chr5,chr6,chr7,chr8" />
|
|
69 <output name="out_file1" file="Extract_features1_out.gff"/>
|
|
70 </test>
|
|
71 </tests>
|
|
72 <help>
|
|
73
|
|
74 **What it does**
|
|
75
|
|
76 This tool extracts selected features from GFF data.
|
|
77
|
|
78 -----
|
|
79
|
|
80 **Example**
|
|
81
|
|
82 Selecting **promoter** from the following GFF data::
|
|
83
|
|
84 chr22 GeneA enhancer 10000000 10001000 500 + . TGA
|
|
85 chr22 GeneA promoter 10010000 10010100 900 + . TGA
|
|
86 chr22 GeneB promoter 10020000 10025000 400 - . TGB
|
|
87 chr22 GeneB CCDS2220 10030000 10065000 800 - . TGB
|
|
88
|
|
89 will produce the following output::
|
|
90
|
|
91 chr22 GeneA promoter 10010000 10010100 900 + . TGA
|
|
92 chr22 GeneB promoter 10020000 10025000 400 - . TGB
|
|
93
|
|
94 ----
|
|
95
|
|
96 .. class:: infomark
|
|
97
|
|
98 **About formats**
|
|
99
|
|
100 **GFF format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF lines have nine tab-separated fields::
|
|
101
|
|
102 1. seqname - Must be a chromosome or scaffold.
|
|
103 2. source - The program that generated this feature.
|
|
104 3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon".
|
|
105 4. start - The starting position of the feature in the sequence. The first base is numbered 1.
|
|
106 5. end - The ending position of the feature (inclusive).
|
|
107 6. score - A score between 0 and 1000. If there is no score value, enter ".".
|
|
108 7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
|
|
109 8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
|
|
110 9. group - All lines with the same group are linked together into a single item.
|
|
111
|
|
112
|
|
113 </help>
|
|
114 </tool>
|