Mercurial > repos > ktnyt > gembassy
comparison GEMBASSY-1.0.3/src/genret.c @ 0:8300eb051bea draft
Initial upload
author | ktnyt |
---|---|
date | Fri, 26 Jun 2015 05:19:29 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8300eb051bea |
---|---|
1 /****************************************************************************** | |
2 ** @source genret | |
3 ** | |
4 ** Retrieves various gene related infomration from genome flatfile | |
5 ** | |
6 ** @author Copyright (C) 2012 Hidetoshi Itaya | |
7 ** @version 1.0.3 | |
8 ** @modified 2012/1/20 Hidetoshi Itaya Created! | |
9 ** @modified 2013/6/16 Revision 1 | |
10 ** @modified 2015/2/7 Refactor | |
11 ** @@ | |
12 ** | |
13 ** This program is free software; you can redistribute it and/or | |
14 ** modify it under the terms of the GNU General Public License | |
15 ** as published by the Free Software Foundation; either version 2 | |
16 ** of the License, or (at your option) any later version. | |
17 ** | |
18 ** This program is distributed in the hope that it will be useful, | |
19 ** but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 ** GNU General Public License for more details. | |
22 ** | |
23 ** You should have received a copy of the GNU General Public License | |
24 ** along with this program; if not, write to the Free Software | |
25 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
26 ******************************************************************************/ | |
27 | |
28 #include "emboss.h" | |
29 #include "glibs.h" | |
30 | |
31 | |
32 | |
33 | |
34 /* @prog genret *************************************************************** | |
35 ** | |
36 ** Retrieves various gene related infomration from genome flatfile | |
37 ** | |
38 ******************************************************************************/ | |
39 | |
40 int main(int argc, char *argv[]) | |
41 { | |
42 embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3"); | |
43 | |
44 AjPSeqall seqall; | |
45 AjPSeq seq = NULL; | |
46 AjPStr inseq = NULL; | |
47 AjPStr gene = NULL; | |
48 AjPStr access = NULL; | |
49 AjBool accid = ajTrue; | |
50 AjPStr argument = NULL; | |
51 AjPFile outfile = NULL; | |
52 | |
53 AjPStr seqid = NULL; | |
54 AjPStr restid = NULL; | |
55 | |
56 AjBool valid = ajFalse; | |
57 AjBool isseq = ajFalse; | |
58 AjBool isgbk = ajFalse; | |
59 | |
60 AjPFilebuff buff = NULL; | |
61 AjPFile tmpfile = NULL; | |
62 AjPStr tmpname = NULL; | |
63 | |
64 AjPStr regexstr = NULL; | |
65 AjPStrTok token = NULL; | |
66 AjPRegexp regex = NULL; | |
67 | |
68 AjPStr url = NULL; | |
69 AjPStr base = NULL; | |
70 AjPStr head = NULL; | |
71 AjPStr line = NULL; | |
72 | |
73 seqall = ajAcdGetSeqall("sequence"); | |
74 access = ajAcdGetString("access"); | |
75 gene = ajAcdGetString("gene"); | |
76 argument = ajAcdGetString("argument"); | |
77 accid = ajAcdGetBoolean("accid"); | |
78 outfile = ajAcdGetOutfile("outfile"); | |
79 | |
80 if( | |
81 ajStrMatchC(access, "translation") || | |
82 ajStrMatchC(access, "get_exon") || | |
83 ajStrMatchC(access, "get_exons") || | |
84 ajStrMatchC(access, "get_cdsseq") || | |
85 ajStrMatchC(access, "get_gbkseq") || | |
86 ajStrMatchC(access, "get_geneseq") || | |
87 ajStrMatchC(access, "get_intron") || | |
88 ajStrMatchC(access, "getseq") || | |
89 ajStrMatchC(access, "seq") || | |
90 ajStrMatchC(access, "around_startcodon") || | |
91 ajStrMatchC(access, "around_stopcodon") || | |
92 ajStrMatchC(access, "before_startcodon") || | |
93 ajStrMatchC(access, "before_stopcodon") || | |
94 ajStrMatchC(access, "after_startcodon") || | |
95 ajStrMatchC(access, "after_stopcodon") | |
96 ) | |
97 { | |
98 isseq = ajTrue; | |
99 } | |
100 else if(ajStrMatchC(access, "annotate") || | |
101 ajStrMatchC(access, "output")) | |
102 { | |
103 isgbk = ajTrue; | |
104 } | |
105 else | |
106 { | |
107 ajFmtPrintF(outfile, "gene,%S\n", access); | |
108 } | |
109 | |
110 base = ajStrNewC("rest.g-language.org"); | |
111 | |
112 ajStrExchangeCC(&argument, " ", "/"); | |
113 ajStrExchangeCC(&argument, ",", "/"); | |
114 ajStrExchangeCC(&argument, "\t", "/"); | |
115 ajStrExchangeCC(&argument, "\r", "/"); | |
116 ajStrExchangeCC(&argument, "\n", "/"); | |
117 | |
118 if(ajStrMatchC(gene, "*")) | |
119 { | |
120 ajStrInsertK(&gene, 0, '.'); | |
121 } | |
122 | |
123 if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::")) | |
124 { | |
125 ajStrExchangeCC(&gene, "@", ""); | |
126 ajStrExchangeCC(&gene, "list::", ""); | |
127 ajStrAssignS(&tmpname, gene); | |
128 | |
129 tmpfile = ajFileNewInNameS(tmpname); | |
130 | |
131 if(!tmpfile) | |
132 { | |
133 ajDie("List file (%S) open error\n", tmpname); | |
134 } | |
135 | |
136 gene = ajStrNew(); | |
137 | |
138 while(ajReadline(tmpfile, &line)) | |
139 { | |
140 ajStrAppendS(&gene, line); | |
141 } | |
142 | |
143 ajFileClose(&tmpfile); | |
144 ajStrDel(&tmpname); | |
145 ajStrDel(&line); | |
146 } | |
147 | |
148 tmpname = ajStrNew(); | |
149 gAssignUniqueName(&tmpname); | |
150 | |
151 while(ajSeqallNext(seqall, &seq)) | |
152 { | |
153 inseq = ajStrNew(); | |
154 | |
155 if(!accid) | |
156 { | |
157 if(gFormatGenbank(seq, &inseq)) | |
158 { | |
159 tmpfile = ajFileNewOutNameS(tmpname); | |
160 | |
161 if(!tmpfile) | |
162 { | |
163 ajDie("Output file (%S) open error\n", tmpname); | |
164 } | |
165 | |
166 ajFmtPrintF(tmpfile, "%S", inseq); | |
167 | |
168 ajFileClose(&tmpfile); | |
169 | |
170 ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); | |
171 | |
172 gFilePostSS(url, tmpname, &restid); | |
173 | |
174 ajStrDel(&url); | |
175 | |
176 ajSysFileUnlinkS(tmpname); | |
177 } | |
178 else | |
179 { | |
180 ajWarn("Sequence does not have features\n" | |
181 "Proceeding with sequence accession ID\n"); | |
182 accid = ajTrue; | |
183 } | |
184 } | |
185 | |
186 | |
187 ajStrAssignS(&seqid, ajSeqGetAccS(seq)); | |
188 | |
189 if(ajStrGetLen(seqid) == 0) | |
190 { | |
191 ajStrAssignS(&seqid, ajSeqGetNameS(seq)); | |
192 } | |
193 | |
194 if(ajStrGetLen(seqid) == 0) | |
195 { | |
196 ajWarn("No valid header information\n"); | |
197 } | |
198 | |
199 if(accid) | |
200 { | |
201 ajStrAssignS(&restid, seqid); | |
202 if(ajStrGetLen(seqid) == 0) | |
203 { | |
204 ajDie("Cannot proceed without header with -accid\n"); | |
205 } | |
206 | |
207 if(!gValID(seqid)) | |
208 { | |
209 ajDie("Invalid accession ID:%S, exiting\n", seqid); | |
210 } | |
211 } | |
212 | |
213 url = ajStrNew(); | |
214 | |
215 if(isgbk) | |
216 { | |
217 ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access); | |
218 } | |
219 else | |
220 { | |
221 ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument); | |
222 } | |
223 | |
224 if(!gFilebuffURLS(url, &buff)) | |
225 { | |
226 ajDie("GET error from %S\n", url); | |
227 } | |
228 | |
229 while(ajBuffreadLine(buff, &line)) | |
230 { | |
231 if(isgbk){ | |
232 ajFmtPrintF(outfile, "%S", line); | |
233 continue; | |
234 } | |
235 | |
236 ajStrRemoveLastNewline(&line); | |
237 | |
238 regex = ajRegCompC("^>"); | |
239 | |
240 if(ajRegExec(regex, line)) | |
241 { | |
242 head = ajStrNew(); | |
243 | |
244 ajStrAssignS(&head, line); | |
245 ajStrTrimStartC(&head, ">"); | |
246 | |
247 valid = ajFalse; | |
248 | |
249 token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n"); | |
250 | |
251 while(ajStrTokenNextParse(token, ®exstr)) | |
252 { | |
253 if(ajStrGetLen(regexstr)) | |
254 { | |
255 regex = ajRegComp(regexstr); | |
256 | |
257 if(ajRegExec(regex, line)) | |
258 { | |
259 valid = ajTrue; | |
260 if(ajStrIsAlnum(regexstr)) | |
261 { | |
262 ajStrExchangeSC(&gene, regexstr, ""); | |
263 } | |
264 } | |
265 | |
266 ajRegFree(®ex); | |
267 } | |
268 } | |
269 } | |
270 else | |
271 { | |
272 if(valid) | |
273 { | |
274 if(isseq) | |
275 { | |
276 ajStrFmtWrap(&line, 60); | |
277 ajFmtPrintF(outfile, ">%S\n%S\n", head, line); | |
278 } | |
279 else | |
280 { | |
281 ajFmtPrintF(outfile, "%S,%S\n", head, line); | |
282 } | |
283 | |
284 valid = ajFalse; | |
285 } | |
286 } | |
287 } | |
288 | |
289 ajFileClose(&outfile); | |
290 | |
291 ajStrDel(&restid); | |
292 ajStrDel(&seqid); | |
293 ajStrDel(&inseq); | |
294 } | |
295 | |
296 ajSeqallDel(&seqall); | |
297 ajSeqDel(&seq); | |
298 ajStrDel(&access); | |
299 ajStrDel(&gene); | |
300 | |
301 embExit(); | |
302 } |