| 0 | 1 /* reverseComplement.c | 
|  | 2    Program reading a FASTA file as input (one or multiple sequence) | 
|  | 3    and then returning the reverse complementary sequence in the output file. | 
|  | 4    It is important to note that for multiple sequence file the first input | 
|  | 5    sequence is the last one at the output file and the last one in the input | 
|  | 6    is the first one in the output. | 
|  | 7    oscart@uma.es | 
|  | 8  */ | 
|  | 9 | 
|  | 10 #include <stdio.h> | 
|  | 11 #include <stdlib.h> | 
|  | 12 #include <string.h> | 
|  | 13 #include <ctype.h> | 
|  | 14 #include <inttypes.h> | 
|  | 15 #include "commonFunctions.h" | 
|  | 16 | 
|  | 17 #define SEQSIZE 2000000000 | 
|  | 18 #define READINPUT 10000 | 
|  | 19 #define WSIZE 32 | 
|  | 20 #define NREADS 1000000 | 
|  | 21 | 
|  | 22 int main(int ac, char** av) { | 
|  | 23 	FILE *fIn, *fOut; | 
|  | 24 	int64_t i, j, nR, seqLen = 0; | 
|  | 25 	char *seq, c, toW; | 
|  | 26 	long *offset = NULL; | 
|  | 27 | 
|  | 28 	if (ac != 3) | 
|  | 29 		terror("USE: reverseComplement seqFile.IN reverseComplementarySeq.OUT"); | 
|  | 30 | 
|  | 31 	/** | 
|  | 32 	 * Allocating memory for the sequence | 
|  | 33 	 * Fixed amount of memory, change it to loadSeqDB? | 
|  | 34 	 */ | 
|  | 35 	if ((seq = (char*) malloc(SEQSIZE)) == NULL) | 
|  | 36 		terror("memory for Seq"); | 
|  | 37 | 
|  | 38 	if ((fIn = fopen(av[1], "rt")) == NULL) | 
|  | 39 		terror("opening IN sequence FASTA file"); | 
|  | 40 | 
|  | 41 	if ((fOut = fopen(av[2], "wt")) == NULL) | 
|  | 42 		terror("opening OUT sequence Words file"); | 
|  | 43 | 
|  | 44 	if ((offset = (long *) malloc(sizeof(long)*NREADS)) == NULL) | 
|  | 45 		terror("memory for offsets"); | 
|  | 46 | 
|  | 47 	for(i=0;i<NREADS;i++){ | 
|  | 48 		offset[i]=0; | 
|  | 49 	} | 
|  | 50 | 
|  | 51 	nR = 0; | 
|  | 52 	c = fgetc(fIn); | 
|  | 53 	while(!feof(fIn)){ | 
|  | 54 		if(c == '>'){ | 
|  | 55 			offset[nR++] = ftell(fIn)-1; | 
|  | 56 		} | 
|  | 57 		c = fgetc(fIn); | 
|  | 58 	} | 
|  | 59 | 
|  | 60 	for(i=nR-1; i>=0; i--){ | 
|  | 61 		fseek(fIn, offset[i], SEEK_SET); | 
|  | 62 		//READ and write header | 
|  | 63 		fgets(seq, READINPUT, fIn); | 
|  | 64 | 
|  | 65 		fprintf(fOut, "%s", seq); | 
|  | 66 		//READ and write sequence | 
|  | 67 		c = fgetc(fIn); | 
|  | 68 		while(c != '>' && !feof(fIn)){ | 
|  | 69 			if(isupper(c) || islower(c)){ | 
|  | 70 				seq[seqLen++]=c; | 
|  | 71 			} | 
|  | 72 			c = fgetc(fIn); | 
|  | 73 		} | 
|  | 74 		for(j=seqLen-1; j >= 0; j--){ | 
|  | 75 			switch(seq[j]){ | 
|  | 76 				case 'A': | 
|  | 77 					toW = 'T'; | 
|  | 78 					break; | 
|  | 79 				case 'C': | 
|  | 80 					toW = 'G'; | 
|  | 81 					break; | 
|  | 82 				case 'G': | 
|  | 83 					toW = 'C'; | 
|  | 84 					break; | 
|  | 85 				case 'T': | 
|  | 86 					toW = 'A'; | 
|  | 87 					break; | 
|  | 88 				case 'U': | 
|  | 89 					toW = 'A'; | 
|  | 90 					break; | 
|  | 91 				case 'a': | 
|  | 92 					toW = 't'; | 
|  | 93 					break; | 
|  | 94 				case 'c': | 
|  | 95 					toW = 'g'; | 
|  | 96 					break; | 
|  | 97 				case 'g': | 
|  | 98 					toW = 'c'; | 
|  | 99 					break; | 
|  | 100 				case 't': | 
|  | 101 					toW = 'a'; | 
|  | 102 					break; | 
|  | 103 				case 'u': | 
|  | 104 					toW = 'a'; | 
|  | 105 					break; | 
|  | 106 				default: | 
|  | 107 					toW = seq[j]; | 
|  | 108 					break; | 
|  | 109 			} | 
|  | 110 			fwrite(&toW, sizeof(char), 1, fOut); | 
|  | 111 		} | 
|  | 112 		toW='\n'; | 
|  | 113 		fwrite(&toW, sizeof(char), 1, fOut); | 
|  | 114 		seqLen=0; | 
|  | 115 	} | 
|  | 116 | 
|  | 117 	fclose(fIn); | 
|  | 118 	fclose(fOut); | 
|  | 119 | 
|  | 120 	return 0; | 
|  | 121 } | 
|  | 122 | 
|  | 123 |