Mercurial > repos > ktnyt > gembassy
diff GEMBASSY-1.0.3/src/genret.c @ 0:8300eb051bea draft
Initial upload
author | ktnyt |
---|---|
date | Fri, 26 Jun 2015 05:19:29 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GEMBASSY-1.0.3/src/genret.c Fri Jun 26 05:19:29 2015 -0400 @@ -0,0 +1,302 @@ +/****************************************************************************** +** @source genret +** +** Retrieves various gene related infomration from genome flatfile +** +** @author Copyright (C) 2012 Hidetoshi Itaya +** @version 1.0.3 +** @modified 2012/1/20 Hidetoshi Itaya Created! +** @modified 2013/6/16 Revision 1 +** @modified 2015/2/7 Refactor +** @@ +** +** This program is free software; you can redistribute it and/or +** modify it under the terms of the GNU General Public License +** as published by the Free Software Foundation; either version 2 +** of the License, or (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +******************************************************************************/ + +#include "emboss.h" +#include "glibs.h" + + + + +/* @prog genret *************************************************************** +** +** Retrieves various gene related infomration from genome flatfile +** +******************************************************************************/ + +int main(int argc, char *argv[]) +{ + embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3"); + + AjPSeqall seqall; + AjPSeq seq = NULL; + AjPStr inseq = NULL; + AjPStr gene = NULL; + AjPStr access = NULL; + AjBool accid = ajTrue; + AjPStr argument = NULL; + AjPFile outfile = NULL; + + AjPStr seqid = NULL; + AjPStr restid = NULL; + + AjBool valid = ajFalse; + AjBool isseq = ajFalse; + AjBool isgbk = ajFalse; + + AjPFilebuff buff = NULL; + AjPFile tmpfile = NULL; + AjPStr tmpname = NULL; + + AjPStr regexstr = NULL; + AjPStrTok token = NULL; + AjPRegexp regex = NULL; + + AjPStr url = NULL; + AjPStr base = NULL; + AjPStr head = NULL; + AjPStr line = NULL; + + seqall = ajAcdGetSeqall("sequence"); + access = ajAcdGetString("access"); + gene = ajAcdGetString("gene"); + argument = ajAcdGetString("argument"); + accid = ajAcdGetBoolean("accid"); + outfile = ajAcdGetOutfile("outfile"); + + if( + ajStrMatchC(access, "translation") || + ajStrMatchC(access, "get_exon") || + ajStrMatchC(access, "get_exons") || + ajStrMatchC(access, "get_cdsseq") || + ajStrMatchC(access, "get_gbkseq") || + ajStrMatchC(access, "get_geneseq") || + ajStrMatchC(access, "get_intron") || + ajStrMatchC(access, "getseq") || + ajStrMatchC(access, "seq") || + ajStrMatchC(access, "around_startcodon") || + ajStrMatchC(access, "around_stopcodon") || + ajStrMatchC(access, "before_startcodon") || + ajStrMatchC(access, "before_stopcodon") || + ajStrMatchC(access, "after_startcodon") || + ajStrMatchC(access, "after_stopcodon") + ) + { + isseq = ajTrue; + } + else if(ajStrMatchC(access, "annotate") || + ajStrMatchC(access, "output")) + { + isgbk = ajTrue; + } + else + { + ajFmtPrintF(outfile, "gene,%S\n", access); + } + + base = ajStrNewC("rest.g-language.org"); + + ajStrExchangeCC(&argument, " ", "/"); + ajStrExchangeCC(&argument, ",", "/"); + ajStrExchangeCC(&argument, "\t", "/"); + ajStrExchangeCC(&argument, "\r", "/"); + ajStrExchangeCC(&argument, "\n", "/"); + + if(ajStrMatchC(gene, "*")) + { + ajStrInsertK(&gene, 0, '.'); + } + + if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::")) + { + ajStrExchangeCC(&gene, "@", ""); + ajStrExchangeCC(&gene, "list::", ""); + ajStrAssignS(&tmpname, gene); + + tmpfile = ajFileNewInNameS(tmpname); + + if(!tmpfile) + { + ajDie("List file (%S) open error\n", tmpname); + } + + gene = ajStrNew(); + + while(ajReadline(tmpfile, &line)) + { + ajStrAppendS(&gene, line); + } + + ajFileClose(&tmpfile); + ajStrDel(&tmpname); + ajStrDel(&line); + } + + tmpname = ajStrNew(); + gAssignUniqueName(&tmpname); + + while(ajSeqallNext(seqall, &seq)) + { + inseq = ajStrNew(); + + if(!accid) + { + if(gFormatGenbank(seq, &inseq)) + { + tmpfile = ajFileNewOutNameS(tmpname); + + if(!tmpfile) + { + ajDie("Output file (%S) open error\n", tmpname); + } + + ajFmtPrintF(tmpfile, "%S", inseq); + + ajFileClose(&tmpfile); + + ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); + + gFilePostSS(url, tmpname, &restid); + + ajStrDel(&url); + + ajSysFileUnlinkS(tmpname); + } + else + { + ajWarn("Sequence does not have features\n" + "Proceeding with sequence accession ID\n"); + accid = ajTrue; + } + } + + + ajStrAssignS(&seqid, ajSeqGetAccS(seq)); + + if(ajStrGetLen(seqid) == 0) + { + ajStrAssignS(&seqid, ajSeqGetNameS(seq)); + } + + if(ajStrGetLen(seqid) == 0) + { + ajWarn("No valid header information\n"); + } + + if(accid) + { + ajStrAssignS(&restid, seqid); + if(ajStrGetLen(seqid) == 0) + { + ajDie("Cannot proceed without header with -accid\n"); + } + + if(!gValID(seqid)) + { + ajDie("Invalid accession ID:%S, exiting\n", seqid); + } + } + + url = ajStrNew(); + + if(isgbk) + { + ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access); + } + else + { + ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument); + } + + if(!gFilebuffURLS(url, &buff)) + { + ajDie("GET error from %S\n", url); + } + + while(ajBuffreadLine(buff, &line)) + { + if(isgbk){ + ajFmtPrintF(outfile, "%S", line); + continue; + } + + ajStrRemoveLastNewline(&line); + + regex = ajRegCompC("^>"); + + if(ajRegExec(regex, line)) + { + head = ajStrNew(); + + ajStrAssignS(&head, line); + ajStrTrimStartC(&head, ">"); + + valid = ajFalse; + + token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n"); + + while(ajStrTokenNextParse(token, ®exstr)) + { + if(ajStrGetLen(regexstr)) + { + regex = ajRegComp(regexstr); + + if(ajRegExec(regex, line)) + { + valid = ajTrue; + if(ajStrIsAlnum(regexstr)) + { + ajStrExchangeSC(&gene, regexstr, ""); + } + } + + ajRegFree(®ex); + } + } + } + else + { + if(valid) + { + if(isseq) + { + ajStrFmtWrap(&line, 60); + ajFmtPrintF(outfile, ">%S\n%S\n", head, line); + } + else + { + ajFmtPrintF(outfile, "%S,%S\n", head, line); + } + + valid = ajFalse; + } + } + } + + ajFileClose(&outfile); + + ajStrDel(&restid); + ajStrDel(&seqid); + ajStrDel(&inseq); + } + + ajSeqallDel(&seqall); + ajSeqDel(&seq); + ajStrDel(&access); + ajStrDel(&gene); + + embExit(); +}