Mercurial > repos > bitlab > gecko
view gecko/src/reverseComplement.c @ 11:cf4c0c822ca9 draft
Uploaded
author | bitlab |
---|---|
date | Wed, 18 Nov 2020 08:37:31 +0000 |
parents | aec70bb1ae27 |
children |
line wrap: on
line source
/* reverseComplement.c Program reading a FASTA file as input (one or multiple sequence) and then returning the reverse complementary sequence in the output file. It is important to note that for multiple sequence file the first input sequence is the last one at the output file and the last one in the input is the first one in the output. oscart@uma.es */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <inttypes.h> #include "commonFunctions.h" #define SEQSIZE 2000000000 #define WSIZE 32 #define NREADS 1000000 int main(int ac, char** av) { FILE *fIn, *fOut; int64_t i, j, nR, seqLen = 0; char *seq, c, toW; long offset[NREADS]; if (ac != 3) terror("USE: reverseComplement seqFile.IN reverseComplementarySeq.OUT"); /** * Allocating memory for the sequence * Fixed amount of memory, change it to loadSeqDB? */ if ((seq = (char*) malloc(SEQSIZE)) == NULL) terror("memory for Seq"); if ((fIn = fopen(av[1], "rt")) == NULL) terror("opening IN sequence FASTA file"); if ((fOut = fopen(av[2], "wt")) == NULL) terror("opening OUT sequence Words file"); for(i=0;i<NREADS;i++){ offset[i]=0; } nR = 0; c = fgetc(fIn); while(!feof(fIn)){ if(c == '>'){ offset[nR++] = ftell(fIn)-1; } c = fgetc(fIn); } for(i=nR-1; i>=0; i--){ fseek(fIn, offset[i], SEEK_SET); //READ and write header if(fgets(seq, SEQSIZE, fIn)==NULL){ terror("Empty file"); } fprintf(fOut, "%s", seq); //READ and write sequence c = fgetc(fIn); while(c != '>' && !feof(fIn)){ if(isupper(c) || islower(c)){ seq[seqLen++]=c; } c = fgetc(fIn); } for(j=seqLen-1; j >= 0; j--){ switch(seq[j]){ case 'A': toW = 'T'; break; case 'C': toW = 'G'; break; case 'G': toW = 'C'; break; case 'T': toW = 'A'; break; case 'U': toW = 'A'; break; case 'a': toW = 't'; break; case 'c': toW = 'g'; break; case 'g': toW = 'c'; break; case 't': toW = 'a'; break; case 'u': toW = 'a'; break; default: toW = seq[j]; break; } fwrite(&toW, sizeof(char), 1, fOut); } toW='\n'; fwrite(&toW, sizeof(char), 1, fOut); seqLen=0; } fclose(fIn); fclose(fOut); return 0; }