Mercurial > repos > bitlab > imsame
comparison IMSAME/src/reverseComplement.c @ 0:762009a91895 draft
Uploaded
| author | bitlab | 
|---|---|
| date | Sat, 15 Dec 2018 18:04:10 -0500 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:762009a91895 | 
|---|---|
| 1 /* reverseComplement.c | |
| 2 Program reading a FASTA file as input (one or multiple sequence) | |
| 3 and then returning the reverse complementary sequence in the output file. | |
| 4 It is important to note that for multiple sequence file the first input | |
| 5 sequence is the last one at the output file and the last one in the input | |
| 6 is the first one in the output. | |
| 7 oscart@uma.es | |
| 8 */ | |
| 9 | |
| 10 #include <stdio.h> | |
| 11 #include <stdlib.h> | |
| 12 #include <string.h> | |
| 13 #include <ctype.h> | |
| 14 #include <inttypes.h> | |
| 15 #include "commonFunctions.h" | |
| 16 | |
| 17 #define SEQSIZE 2000000000 | |
| 18 #define READINPUT 10000 | |
| 19 #define WSIZE 32 | |
| 20 #define NREADS 1000000 | |
| 21 | |
| 22 int main(int ac, char** av) { | |
| 23 FILE *fIn, *fOut; | |
| 24 int64_t i, j, nR, seqLen = 0; | |
| 25 char *seq, c, toW; | |
| 26 long *offset = NULL; | |
| 27 | |
| 28 if (ac != 3) | |
| 29 terror("USE: reverseComplement seqFile.IN reverseComplementarySeq.OUT"); | |
| 30 | |
| 31 /** | |
| 32 * Allocating memory for the sequence | |
| 33 * Fixed amount of memory, change it to loadSeqDB? | |
| 34 */ | |
| 35 if ((seq = (char*) malloc(SEQSIZE)) == NULL) | |
| 36 terror("memory for Seq"); | |
| 37 | |
| 38 if ((fIn = fopen(av[1], "rt")) == NULL) | |
| 39 terror("opening IN sequence FASTA file"); | |
| 40 | |
| 41 if ((fOut = fopen(av[2], "wt")) == NULL) | |
| 42 terror("opening OUT sequence Words file"); | |
| 43 | |
| 44 if ((offset = (long *) malloc(sizeof(long)*NREADS)) == NULL) | |
| 45 terror("memory for offsets"); | |
| 46 | |
| 47 for(i=0;i<NREADS;i++){ | |
| 48 offset[i]=0; | |
| 49 } | |
| 50 | |
| 51 nR = 0; | |
| 52 c = fgetc(fIn); | |
| 53 while(!feof(fIn)){ | |
| 54 if(c == '>'){ | |
| 55 offset[nR++] = ftell(fIn)-1; | |
| 56 } | |
| 57 c = fgetc(fIn); | |
| 58 } | |
| 59 | |
| 60 for(i=nR-1; i>=0; i--){ | |
| 61 fseek(fIn, offset[i], SEEK_SET); | |
| 62 //READ and write header | |
| 63 fgets(seq, READINPUT, fIn); | |
| 64 | |
| 65 fprintf(fOut, "%s", seq); | |
| 66 //READ and write sequence | |
| 67 c = fgetc(fIn); | |
| 68 while(c != '>' && !feof(fIn)){ | |
| 69 if(isupper(c) || islower(c)){ | |
| 70 seq[seqLen++]=c; | |
| 71 } | |
| 72 c = fgetc(fIn); | |
| 73 } | |
| 74 for(j=seqLen-1; j >= 0; j--){ | |
| 75 switch(seq[j]){ | |
| 76 case 'A': | |
| 77 toW = 'T'; | |
| 78 break; | |
| 79 case 'C': | |
| 80 toW = 'G'; | |
| 81 break; | |
| 82 case 'G': | |
| 83 toW = 'C'; | |
| 84 break; | |
| 85 case 'T': | |
| 86 toW = 'A'; | |
| 87 break; | |
| 88 case 'U': | |
| 89 toW = 'A'; | |
| 90 break; | |
| 91 case 'a': | |
| 92 toW = 't'; | |
| 93 break; | |
| 94 case 'c': | |
| 95 toW = 'g'; | |
| 96 break; | |
| 97 case 'g': | |
| 98 toW = 'c'; | |
| 99 break; | |
| 100 case 't': | |
| 101 toW = 'a'; | |
| 102 break; | |
| 103 case 'u': | |
| 104 toW = 'a'; | |
| 105 break; | |
| 106 default: | |
| 107 toW = seq[j]; | |
| 108 break; | |
| 109 } | |
| 110 fwrite(&toW, sizeof(char), 1, fOut); | |
| 111 } | |
| 112 toW='\n'; | |
| 113 fwrite(&toW, sizeof(char), 1, fOut); | |
| 114 seqLen=0; | |
| 115 } | |
| 116 | |
| 117 fclose(fIn); | |
| 118 fclose(fOut); | |
| 119 | |
| 120 return 0; | |
| 121 } | |
| 122 | |
| 123 | 
