annotate tools/extract/extract_genomic_dna.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="2.2.2">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>using coordinates from assembled/unassembled genomes</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 <command interpreter="python">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 extract_genomic_dna.py $input $out_file1 -o $out_format -d $dbkey
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 #if str( $interpret_features ) == "yes":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 -I
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 ## Columns to use in input file.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 #if isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 -1 1,4,5,7 --gff
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 #else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 -1 ${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 #if $seq_source.index_source == "cached":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 ## Genomic data from cache.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 -g ${GALAXY_DATA_INDEX_DIR}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 #else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 ## Genomic data from history.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 -F $seq_source.ref_file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 <param format="interval,gff" name="input" type="data" label="Fetch sequences for intervals in"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 <param name="interpret_features" type="select" label="Interpret features when possible" help="Only meaningful for GFF, GTF datasets.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 <option value="yes">Yes</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 <option value="no">No</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 <conditional name="seq_source">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 <param name="index_source" type="select" label="Source for Genomic Data">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 <option value="cached">Locally cached</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 <option value="history">History</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 <when value="cached">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <when value="history">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 <param name="out_format" type="select" label="Output data type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 <option value="fasta">FASTA</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 <option value="interval">Interval</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 <data format="input" name="out_file1" metadata_source="input">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 <change_format>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 <when input="out_format" value="fasta" format="fasta" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 </change_format>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 </data>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 <requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 <requirement type="binary">faToTwoBit</requirement>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 </requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 <param name="interpret_features" value="yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 <param name="index_source" value="cached"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 <param name="out_format" value="fasta"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 <output name="out_file1" file="extract_genomic_dna_out1.fasta" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 <param name="interpret_features" value="yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 <param name="index_source" value="cached"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 <param name="out_format" value="fasta"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 <output name="out_file1" file="extract_genomic_dna_out2.fasta" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 <param name="interpret_features" value="yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 <param name="index_source" value="cached"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 <param name="out_format" value="interval"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 <output name="out_file1" file="extract_genomic_dna_out3.interval" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 <!-- Test GFF file support. -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 <param name="interpret_features" value="no"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 <param name="index_source" value="cached"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 <param name="out_format" value="interval"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 <output name="out_file1" file="extract_genomic_dna_out4.gff" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 <param name="interpret_features" value="no"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 <param name="out_format" value="fasta"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 <param name="index_source" value="cached"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 <output name="out_file1" file="extract_genomic_dna_out5.fasta" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 <!-- Test custom sequences support and GFF feature interpretation. -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 <param name="interpret_features" value="no"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 <param name="index_source" value="history"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 <param name="ref_file" value="tophat_in1.fasta"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 <param name="out_format" value="fasta"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 <output name="out_file1" file="extract_genomic_dna_out6.fasta" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 <param name="interpret_features" value="yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 <param name="index_source" value="history"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 <param name="ref_file" value="tophat_in1.fasta"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 <param name="out_format" value="fasta"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 <output name="out_file1" file="extract_genomic_dna_out7.fasta" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 This tool requires interval or gff (special tabular formatted data). If your data is not TAB delimited, first use *Text Manipulation-&gt;Convert*.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 Make sure that the genome build is specified for the dataset from which you are extracting sequences (click the pencil icon in the history item if it is not specified).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 All of the following will cause a line from the input dataset to be skipped and a warning generated. The number of warnings and skipped lines is documented in the resulting history item.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 - Sequences that fall outside of the range of a line's start and end coordinates.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 - Chromosome, start or end coordinates that are invalid for the specified build.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 **What it does**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 If strand is not defined, the default value is "+".
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 **Example**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 If the input dataset is::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 chr7 127475281 127475310 NM_000230 0 +
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 chr7 127485994 127486166 NM_000230 0 +
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 chr7 127486011 127486166 D49487 0 +
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 Extracting sequences with **FASTA** output data type returns::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 &gt;hg17_chr7_127475281_127475310_+
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 &gt;hg17_chr7_127485994_127486166_+
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 GATCAATGACATTTCACACACG
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 &gt;hg17_chr7_127486011_127486166_+
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 ACACG
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 Extracting sequences with **Interval** output data type returns::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 chr7 127475281 127475310 NM_000230 0 + GTAGGAATCGCAGCGCCAGCGGTTGCAAG
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 chr7 127485994 127486166 NM_000230 0 + GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 chr7 127486011 127486166 D49487 0 + TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 </tool>