0
|
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="2.2.2">
|
|
2 <description>using coordinates from assembled/unassembled genomes</description>
|
|
3 <command interpreter="python">
|
|
4 extract_genomic_dna.py $input $out_file1 -o $out_format -d $dbkey
|
|
5
|
|
6 #if str( $interpret_features ) == "yes":
|
|
7 -I
|
|
8 #end if
|
|
9
|
|
10 ## Columns to use in input file.
|
|
11 #if isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
|
|
12 -1 1,4,5,7 --gff
|
|
13 #else:
|
|
14 -1 ${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol}
|
|
15 #end if
|
|
16
|
|
17 #if $seq_source.index_source == "cached":
|
|
18 ## Genomic data from cache.
|
|
19 -g ${GALAXY_DATA_INDEX_DIR}
|
|
20 #else:
|
|
21 ## Genomic data from history.
|
|
22 -F $seq_source.ref_file
|
|
23 #end if
|
|
24 </command>
|
|
25 <inputs>
|
|
26 <param format="interval,gff" name="input" type="data" label="Fetch sequences for intervals in"/>
|
|
27 <param name="interpret_features" type="select" label="Interpret features when possible" help="Only meaningful for GFF, GTF datasets.">
|
|
28 <option value="yes">Yes</option>
|
|
29 <option value="no">No</option>
|
|
30 </param>
|
|
31 <conditional name="seq_source">
|
|
32 <param name="index_source" type="select" label="Source for Genomic Data">
|
|
33 <option value="cached">Locally cached</option>
|
|
34 <option value="history">History</option>
|
|
35 </param>
|
|
36 <when value="cached">
|
|
37 </when>
|
|
38 <when value="history">
|
|
39 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
|
|
40 </when>
|
|
41 </conditional>
|
|
42 <param name="out_format" type="select" label="Output data type">
|
|
43 <option value="fasta">FASTA</option>
|
|
44 <option value="interval">Interval</option>
|
|
45 </param>
|
|
46 </inputs>
|
|
47 <outputs>
|
|
48 <data format="input" name="out_file1" metadata_source="input">
|
|
49 <change_format>
|
|
50 <when input="out_format" value="fasta" format="fasta" />
|
|
51 </change_format>
|
|
52 </data>
|
|
53 </outputs>
|
|
54 <requirements>
|
|
55 <requirement type="binary">faToTwoBit</requirement>
|
|
56 </requirements>
|
|
57 <tests>
|
|
58 <test>
|
|
59 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
|
|
60 <param name="interpret_features" value="yes"/>
|
|
61 <param name="index_source" value="cached"/>
|
|
62 <param name="out_format" value="fasta"/>
|
|
63 <output name="out_file1" file="extract_genomic_dna_out1.fasta" />
|
|
64 </test>
|
|
65 <test>
|
|
66 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
|
|
67 <param name="interpret_features" value="yes"/>
|
|
68 <param name="index_source" value="cached"/>
|
|
69 <param name="out_format" value="fasta"/>
|
|
70 <output name="out_file1" file="extract_genomic_dna_out2.fasta" />
|
|
71 </test>
|
|
72 <test>
|
|
73 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
|
|
74 <param name="interpret_features" value="yes"/>
|
|
75 <param name="index_source" value="cached"/>
|
|
76 <param name="out_format" value="interval"/>
|
|
77 <output name="out_file1" file="extract_genomic_dna_out3.interval" />
|
|
78 </test>
|
|
79 <!-- Test GFF file support. -->
|
|
80 <test>
|
|
81 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
|
|
82 <param name="interpret_features" value="no"/>
|
|
83 <param name="index_source" value="cached"/>
|
|
84 <param name="out_format" value="interval"/>
|
|
85 <output name="out_file1" file="extract_genomic_dna_out4.gff" />
|
|
86 </test>
|
|
87 <test>
|
|
88 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
|
|
89 <param name="interpret_features" value="no"/>
|
|
90 <param name="out_format" value="fasta"/>
|
|
91 <param name="index_source" value="cached"/>
|
|
92 <output name="out_file1" file="extract_genomic_dna_out5.fasta" />
|
|
93 </test>
|
|
94 <!-- Test custom sequences support and GFF feature interpretation. -->
|
|
95 <test>
|
|
96 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
|
|
97 <param name="interpret_features" value="no"/>
|
|
98 <param name="index_source" value="history"/>
|
|
99 <param name="ref_file" value="tophat_in1.fasta"/>
|
|
100 <param name="out_format" value="fasta"/>
|
|
101 <output name="out_file1" file="extract_genomic_dna_out6.fasta" />
|
|
102 </test>
|
|
103 <test>
|
|
104 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
|
|
105 <param name="interpret_features" value="yes"/>
|
|
106 <param name="index_source" value="history"/>
|
|
107 <param name="ref_file" value="tophat_in1.fasta"/>
|
|
108 <param name="out_format" value="fasta"/>
|
|
109 <output name="out_file1" file="extract_genomic_dna_out7.fasta" />
|
|
110 </test>
|
|
111 </tests>
|
|
112 <help>
|
|
113
|
|
114 .. class:: warningmark
|
|
115
|
|
116 This tool requires interval or gff (special tabular formatted data). If your data is not TAB delimited, first use *Text Manipulation->Convert*.
|
|
117
|
|
118 .. class:: warningmark
|
|
119
|
|
120 Make sure that the genome build is specified for the dataset from which you are extracting sequences (click the pencil icon in the history item if it is not specified).
|
|
121
|
|
122 .. class:: warningmark
|
|
123
|
|
124 All of the following will cause a line from the input dataset to be skipped and a warning generated. The number of warnings and skipped lines is documented in the resulting history item.
|
|
125 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
|
|
126 - Sequences that fall outside of the range of a line's start and end coordinates.
|
|
127 - Chromosome, start or end coordinates that are invalid for the specified build.
|
|
128 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
|
|
129
|
|
130 .. class:: infomark
|
|
131
|
|
132 **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
|
|
133
|
|
134 -----
|
|
135
|
|
136 **What it does**
|
|
137
|
|
138 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
|
|
139
|
|
140 If strand is not defined, the default value is "+".
|
|
141
|
|
142 -----
|
|
143
|
|
144 **Example**
|
|
145
|
|
146 If the input dataset is::
|
|
147
|
|
148 chr7 127475281 127475310 NM_000230 0 +
|
|
149 chr7 127485994 127486166 NM_000230 0 +
|
|
150 chr7 127486011 127486166 D49487 0 +
|
|
151
|
|
152 Extracting sequences with **FASTA** output data type returns::
|
|
153
|
|
154 >hg17_chr7_127475281_127475310_+
|
|
155 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
|
|
156 >hg17_chr7_127485994_127486166_+
|
|
157 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
|
|
158 GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC
|
|
159 CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG
|
|
160 GATCAATGACATTTCACACACG
|
|
161 >hg17_chr7_127486011_127486166_+
|
|
162 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG
|
|
163 CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA
|
|
164 CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC
|
|
165 ACACG
|
|
166
|
|
167 Extracting sequences with **Interval** output data type returns::
|
|
168
|
|
169 chr7 127475281 127475310 NM_000230 0 + GTAGGAATCGCAGCGCCAGCGGTTGCAAG
|
|
170 chr7 127485994 127486166 NM_000230 0 + GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
|
|
171 chr7 127486011 127486166 D49487 0 + TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
|
|
172
|
|
173 </help>
|
|
174 </tool>
|