Mercurial > repos > iuc > ncbi_eutils_ecitmatch
comparison __efetch_build_options.py @ 0:68cd8d564e0a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit 15bcc5104c577b4b9c761f2854fc686c07ffa9db
author | iuc |
---|---|
date | Thu, 07 Jul 2016 02:39:21 -0400 |
parents | |
children | 732a52c18758 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:68cd8d564e0a |
---|---|
1 #!/usr/bin/env python | |
2 # Daniel Blankenberg | |
3 # Creates the options for tool interface | |
4 import re | |
5 | |
6 # http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi | |
7 db_list = ''' | |
8 <DbName>annotinfo</DbName> | |
9 <DbName>assembly</DbName> | |
10 <DbName>bioproject</DbName> | |
11 <DbName>biosample</DbName> | |
12 <DbName>biosystems</DbName> | |
13 <DbName>blastdbinfo</DbName> | |
14 <DbName>books</DbName> | |
15 <DbName>cdd</DbName> | |
16 <DbName>clinvar</DbName> | |
17 <DbName>clone</DbName> | |
18 <DbName>dbvar</DbName> | |
19 <DbName>gap</DbName> | |
20 <DbName>gapplus</DbName> | |
21 <DbName>gds</DbName> | |
22 <DbName>gencoll</DbName> | |
23 <DbName>gene</DbName> | |
24 <DbName>genome</DbName> | |
25 <DbName>geoprofiles</DbName> | |
26 <DbName>grasp</DbName> | |
27 <DbName>gtr</DbName> | |
28 <DbName>homologene</DbName> | |
29 <DbName>medgen</DbName> | |
30 <DbName>mesh</DbName> | |
31 <DbName>ncbisearch</DbName> | |
32 <DbName>nlmcatalog</DbName> | |
33 <DbName>nuccore</DbName> | |
34 <DbName>nucest</DbName> | |
35 <DbName>nucgss</DbName> | |
36 <DbName>nucleotide</DbName> | |
37 <DbName>omim</DbName> | |
38 <DbName>orgtrack</DbName> | |
39 <DbName>pcassay</DbName> | |
40 <DbName>pccompound</DbName> | |
41 <DbName>pcsubstance</DbName> | |
42 <DbName>pmc</DbName> | |
43 <DbName>popset</DbName> | |
44 <DbName>probe</DbName> | |
45 <DbName>protein</DbName> | |
46 <DbName>proteinclusters</DbName> | |
47 <DbName>pubmed</DbName> | |
48 <DbName>pubmedhealth</DbName> | |
49 <DbName>seqannot</DbName> | |
50 <DbName>snp</DbName> | |
51 <DbName>sra</DbName> | |
52 <DbName>structure</DbName> | |
53 <DbName>taxonomy</DbName> | |
54 <DbName>unigene</DbName>'''.replace( "<DbName>", "").replace( "</DbName>", "").split("\n") | |
55 | |
56 | |
57 help = ''' (all) | |
58 docsum xml Document Summary | |
59 docsum json Document Summary | |
60 full text Full Document | |
61 uilist xml Unique Identifier List | |
62 uilist text Unique Identifier List | |
63 full xml Full Document | |
64 | |
65 bioproject | |
66 native BioProject Report | |
67 native xml RecordSet | |
68 | |
69 biosample | |
70 native BioSample Report | |
71 native xml BioSampleSet | |
72 | |
73 biosystems | |
74 native xml Sys-set | |
75 | |
76 gds | |
77 native xml RecordSet | |
78 summary text Summary | |
79 | |
80 gene | |
81 gene_table xml Gene Table | |
82 native text Gene Report | |
83 native asn.1 Entrezgene | |
84 native xml Entrezgene-Set | |
85 tabular tabular Tabular Report | |
86 | |
87 homologene | |
88 alignmentscores text Alignment Scores | |
89 fasta fasta FASTA | |
90 homologene text Homologene Report | |
91 native text Homologene List | |
92 native asn.1 HG-Entry | |
93 native xml Entrez-Homologene-Set | |
94 | |
95 mesh | |
96 full text Full Record | |
97 native text MeSH Report | |
98 native xml RecordSet | |
99 | |
100 nlmcatalog | |
101 native text Full Record | |
102 native xml NLMCatalogRecordSet | |
103 | |
104 pmc | |
105 medline text MEDLINE | |
106 native xml pmc-articleset | |
107 | |
108 pubmed | |
109 abstract xml Abstract | |
110 medline text MEDLINE | |
111 native asn.1 Pubmed-entry | |
112 native xml PubmedArticleSet | |
113 | |
114 (sequences) | |
115 acc text Accession Number | |
116 est xml EST Report | |
117 fasta fasta FASTA | |
118 fasta xml TinySeq | |
119 fasta_cds_aa fasta CDS Products | |
120 fasta_cds_na fasta Coding Regions | |
121 ft text Feature Table | |
122 gb text GenBank Flatfile | |
123 gb xml GBSet | |
124 gbc xml INSDSet | |
125 gbwithparts text GenBank with Contig Sequences | |
126 gene_fasta fasta FASTA of Gene | |
127 gp text GenPept Flatfile | |
128 gp xml GBSet | |
129 gpc xml INSDSet | |
130 gss text GSS Report | |
131 ipg text Identical Protein Report | |
132 ipg xml IPGReportSet | |
133 native text Seq-entry | |
134 native xml Bioseq-set | |
135 seqid asn.1 Seq-id | |
136 | |
137 snp | |
138 chr text Chromosome Report | |
139 docset text Summary | |
140 fasta fasta FASTA | |
141 flt text Flat File | |
142 native asn.1 Rs | |
143 native xml ExchangeSet | |
144 rsr tabular RS Cluster Report | |
145 ssexemplar text SS Exemplar List | |
146 | |
147 sra | |
148 native xml EXPERIMENT_PACKAGE_SET | |
149 runinfo xml SraRunInfo | |
150 | |
151 structure | |
152 mmdb asn.1 Ncbi-mime-asn1 strucseq | |
153 native text MMDB Report | |
154 native xml RecordSet | |
155 | |
156 taxonomy | |
157 native text Taxonomy List | |
158 native xml TaxaSet'''.split("\n") | |
159 | |
160 | |
161 db = {} | |
162 for db_name in db_list: | |
163 db[db_name] = [] | |
164 | |
165 section = None | |
166 for line in help: | |
167 line = re.split('\s{2,}', line.strip()) | |
168 # Ignore empties | |
169 if len(line) == 0: | |
170 continue | |
171 # Section headers have one item | |
172 elif len(line) == 1: | |
173 section = line[0] | |
174 db[section] = [] | |
175 # Format lines have 2+ | |
176 elif len(line) == 2: | |
177 parent_format = line[0] | |
178 description = line[1] | |
179 | |
180 if parent_format not in db[section]: | |
181 db[section].append((parent_format, None, description)) | |
182 elif len(line) == 3: | |
183 parent_format = line[0] | |
184 format_modifier = line[1] | |
185 description = line[2] | |
186 | |
187 if parent_format not in db[section]: | |
188 db[section].append((parent_format, format_modifier, description)) | |
189 | |
190 | |
191 all_formats = db['(all)'] | |
192 del db['(all)'] | |
193 sequences_formats = db['(sequences)'] | |
194 del db['(sequences)'] | |
195 del db[''] | |
196 | |
197 for key in db: | |
198 db[key] += all_formats | |
199 | |
200 for key in ('nuccore', 'nucest', 'nucgss', 'nucleotide'): | |
201 db[key] += sequences_formats | |
202 | |
203 MACRO_TPL = ''' | |
204 | |
205 ''' | |
206 | |
207 WHEN_TPL = ''' <when value="{format}"> | |
208 <param name="output_format" type="select" label="Output Format"> | |
209 {format_options} | |
210 </param> | |
211 </when>''' | |
212 | |
213 FORMAT_OPTION_TPL = '''<option value="{name_type}">{name_type_human}</option>''' | |
214 | |
215 format_names = {} | |
216 | |
217 print ''' <xml name="db"> | |
218 <conditional name="db"> | |
219 <expand macro="dbselect" />''' | |
220 for key in sorted(db): | |
221 format_options = [] | |
222 | |
223 for (parent_format, format_modifier, description) in sorted(db[key]): | |
224 name_human = description | |
225 if format_modifier: | |
226 name_human += ' (%s)' % format_modifier | |
227 format_string = '%s-%s' % (parent_format, format_modifier) | |
228 | |
229 format_options.append(FORMAT_OPTION_TPL.format( | |
230 name_type=format_string, | |
231 name_type_human=name_human, | |
232 )) | |
233 | |
234 format_names[format_string] = format_modifier | |
235 | |
236 print WHEN_TPL.format( | |
237 format=key, | |
238 format_options='\n '.join(format_options) | |
239 ) | |
240 | |
241 print ''' </conditional> | |
242 </xml>''' | |
243 | |
244 CHANGE_FORMAT_TPL = ''' | |
245 <xml name="efetch_formats"> | |
246 <change_format> | |
247 {formats} | |
248 </change_format> | |
249 </xml> | |
250 ''' | |
251 | |
252 CHANGE_FORMAT_WHEN_TPL = '''<when input="output_format" value="{key}" format="{value}"/>''' | |
253 # Format options | |
254 | |
255 | |
256 whens = [] | |
257 for (k, v) in format_names.items(): | |
258 if v is None: | |
259 v = 'text' | |
260 elif v == 'asn.1': | |
261 v = 'asn1' | |
262 | |
263 whens.append(CHANGE_FORMAT_WHEN_TPL.format( | |
264 key=k, value=v | |
265 )) | |
266 | |
267 print CHANGE_FORMAT_TPL.format(formats='\n '.join(whens)) |