comparison __efetch_build_options.py @ 0:68cd8d564e0a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit 15bcc5104c577b4b9c761f2854fc686c07ffa9db
author iuc
date Thu, 07 Jul 2016 02:39:21 -0400
parents
children 732a52c18758
comparison
equal deleted inserted replaced
-1:000000000000 0:68cd8d564e0a
1 #!/usr/bin/env python
2 # Daniel Blankenberg
3 # Creates the options for tool interface
4 import re
5
6 # http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi
7 db_list = '''
8 <DbName>annotinfo</DbName>
9 <DbName>assembly</DbName>
10 <DbName>bioproject</DbName>
11 <DbName>biosample</DbName>
12 <DbName>biosystems</DbName>
13 <DbName>blastdbinfo</DbName>
14 <DbName>books</DbName>
15 <DbName>cdd</DbName>
16 <DbName>clinvar</DbName>
17 <DbName>clone</DbName>
18 <DbName>dbvar</DbName>
19 <DbName>gap</DbName>
20 <DbName>gapplus</DbName>
21 <DbName>gds</DbName>
22 <DbName>gencoll</DbName>
23 <DbName>gene</DbName>
24 <DbName>genome</DbName>
25 <DbName>geoprofiles</DbName>
26 <DbName>grasp</DbName>
27 <DbName>gtr</DbName>
28 <DbName>homologene</DbName>
29 <DbName>medgen</DbName>
30 <DbName>mesh</DbName>
31 <DbName>ncbisearch</DbName>
32 <DbName>nlmcatalog</DbName>
33 <DbName>nuccore</DbName>
34 <DbName>nucest</DbName>
35 <DbName>nucgss</DbName>
36 <DbName>nucleotide</DbName>
37 <DbName>omim</DbName>
38 <DbName>orgtrack</DbName>
39 <DbName>pcassay</DbName>
40 <DbName>pccompound</DbName>
41 <DbName>pcsubstance</DbName>
42 <DbName>pmc</DbName>
43 <DbName>popset</DbName>
44 <DbName>probe</DbName>
45 <DbName>protein</DbName>
46 <DbName>proteinclusters</DbName>
47 <DbName>pubmed</DbName>
48 <DbName>pubmedhealth</DbName>
49 <DbName>seqannot</DbName>
50 <DbName>snp</DbName>
51 <DbName>sra</DbName>
52 <DbName>structure</DbName>
53 <DbName>taxonomy</DbName>
54 <DbName>unigene</DbName>'''.replace( "<DbName>", "").replace( "</DbName>", "").split("\n")
55
56
57 help = ''' (all)
58 docsum xml Document Summary
59 docsum json Document Summary
60 full text Full Document
61 uilist xml Unique Identifier List
62 uilist text Unique Identifier List
63 full xml Full Document
64
65 bioproject
66 native BioProject Report
67 native xml RecordSet
68
69 biosample
70 native BioSample Report
71 native xml BioSampleSet
72
73 biosystems
74 native xml Sys-set
75
76 gds
77 native xml RecordSet
78 summary text Summary
79
80 gene
81 gene_table xml Gene Table
82 native text Gene Report
83 native asn.1 Entrezgene
84 native xml Entrezgene-Set
85 tabular tabular Tabular Report
86
87 homologene
88 alignmentscores text Alignment Scores
89 fasta fasta FASTA
90 homologene text Homologene Report
91 native text Homologene List
92 native asn.1 HG-Entry
93 native xml Entrez-Homologene-Set
94
95 mesh
96 full text Full Record
97 native text MeSH Report
98 native xml RecordSet
99
100 nlmcatalog
101 native text Full Record
102 native xml NLMCatalogRecordSet
103
104 pmc
105 medline text MEDLINE
106 native xml pmc-articleset
107
108 pubmed
109 abstract xml Abstract
110 medline text MEDLINE
111 native asn.1 Pubmed-entry
112 native xml PubmedArticleSet
113
114 (sequences)
115 acc text Accession Number
116 est xml EST Report
117 fasta fasta FASTA
118 fasta xml TinySeq
119 fasta_cds_aa fasta CDS Products
120 fasta_cds_na fasta Coding Regions
121 ft text Feature Table
122 gb text GenBank Flatfile
123 gb xml GBSet
124 gbc xml INSDSet
125 gbwithparts text GenBank with Contig Sequences
126 gene_fasta fasta FASTA of Gene
127 gp text GenPept Flatfile
128 gp xml GBSet
129 gpc xml INSDSet
130 gss text GSS Report
131 ipg text Identical Protein Report
132 ipg xml IPGReportSet
133 native text Seq-entry
134 native xml Bioseq-set
135 seqid asn.1 Seq-id
136
137 snp
138 chr text Chromosome Report
139 docset text Summary
140 fasta fasta FASTA
141 flt text Flat File
142 native asn.1 Rs
143 native xml ExchangeSet
144 rsr tabular RS Cluster Report
145 ssexemplar text SS Exemplar List
146
147 sra
148 native xml EXPERIMENT_PACKAGE_SET
149 runinfo xml SraRunInfo
150
151 structure
152 mmdb asn.1 Ncbi-mime-asn1 strucseq
153 native text MMDB Report
154 native xml RecordSet
155
156 taxonomy
157 native text Taxonomy List
158 native xml TaxaSet'''.split("\n")
159
160
161 db = {}
162 for db_name in db_list:
163 db[db_name] = []
164
165 section = None
166 for line in help:
167 line = re.split('\s{2,}', line.strip())
168 # Ignore empties
169 if len(line) == 0:
170 continue
171 # Section headers have one item
172 elif len(line) == 1:
173 section = line[0]
174 db[section] = []
175 # Format lines have 2+
176 elif len(line) == 2:
177 parent_format = line[0]
178 description = line[1]
179
180 if parent_format not in db[section]:
181 db[section].append((parent_format, None, description))
182 elif len(line) == 3:
183 parent_format = line[0]
184 format_modifier = line[1]
185 description = line[2]
186
187 if parent_format not in db[section]:
188 db[section].append((parent_format, format_modifier, description))
189
190
191 all_formats = db['(all)']
192 del db['(all)']
193 sequences_formats = db['(sequences)']
194 del db['(sequences)']
195 del db['']
196
197 for key in db:
198 db[key] += all_formats
199
200 for key in ('nuccore', 'nucest', 'nucgss', 'nucleotide'):
201 db[key] += sequences_formats
202
203 MACRO_TPL = '''
204
205 '''
206
207 WHEN_TPL = ''' <when value="{format}">
208 <param name="output_format" type="select" label="Output Format">
209 {format_options}
210 </param>
211 </when>'''
212
213 FORMAT_OPTION_TPL = '''<option value="{name_type}">{name_type_human}</option>'''
214
215 format_names = {}
216
217 print ''' <xml name="db">
218 <conditional name="db">
219 <expand macro="dbselect" />'''
220 for key in sorted(db):
221 format_options = []
222
223 for (parent_format, format_modifier, description) in sorted(db[key]):
224 name_human = description
225 if format_modifier:
226 name_human += ' (%s)' % format_modifier
227 format_string = '%s-%s' % (parent_format, format_modifier)
228
229 format_options.append(FORMAT_OPTION_TPL.format(
230 name_type=format_string,
231 name_type_human=name_human,
232 ))
233
234 format_names[format_string] = format_modifier
235
236 print WHEN_TPL.format(
237 format=key,
238 format_options='\n '.join(format_options)
239 )
240
241 print ''' </conditional>
242 </xml>'''
243
244 CHANGE_FORMAT_TPL = '''
245 <xml name="efetch_formats">
246 <change_format>
247 {formats}
248 </change_format>
249 </xml>
250 '''
251
252 CHANGE_FORMAT_WHEN_TPL = '''<when input="output_format" value="{key}" format="{value}"/>'''
253 # Format options
254
255
256 whens = []
257 for (k, v) in format_names.items():
258 if v is None:
259 v = 'text'
260 elif v == 'asn.1':
261 v = 'asn1'
262
263 whens.append(CHANGE_FORMAT_WHEN_TPL.format(
264 key=k, value=v
265 ))
266
267 print CHANGE_FORMAT_TPL.format(formats='\n '.join(whens))