Mercurial > repos > padge > trimal
comparison trimal_repo/source/readAl.cpp @ 0:b15a3147e604 draft
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author | padge |
---|---|
date | Fri, 25 Mar 2022 17:10:43 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b15a3147e604 |
---|---|
1 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** | |
2 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** | |
3 | |
4 readAl v1.4: a tool for automated alignment conversion among different | |
5 formats. | |
6 | |
7 2009-2015 Capella-Gutierrez S. and Gabaldon, T. | |
8 [scapella, tgabaldon]@crg.es | |
9 | |
10 This file is part of readAl. | |
11 | |
12 readAl is free software: you can redistribute it and/or modify | |
13 it under the terms of the GNU General Public License as published by | |
14 the Free Software Foundation, the last available version. | |
15 | |
16 readAl is distributed in the hope that it will be useful, | |
17 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 GNU General Public License for more details. | |
20 | |
21 You should have received a copy of the GNU General Public License | |
22 along with readAl. If not, see <http://www.gnu.org/licenses/>. | |
23 | |
24 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** | |
25 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
26 | |
27 #include <stdlib.h> | |
28 #include <string.h> | |
29 | |
30 #include "alignment.h" | |
31 #include "defines.h" | |
32 #include "utils.h" | |
33 | |
34 void menu(void); | |
35 | |
36 int main(int argc, char *argv[]) { | |
37 | |
38 /* Input alignment */ | |
39 alignment inAlig; | |
40 | |
41 /* Local variables */ | |
42 string align_format; | |
43 int i, outformat = -1; | |
44 char *infile = NULL, *outfile = NULL; | |
45 bool errors = false, reverse = false, shortNames = false, format = false, \ | |
46 type = false, info = false; | |
47 | |
48 /* If there is no parameters: Inform about readAl options and finish */ | |
49 if(argc == 1) { | |
50 menu(); | |
51 return 0; | |
52 } | |
53 | |
54 i = 1; | |
55 /* If option -h has been used, inform about readAl options and finish */ | |
56 if(!strcmp(argv[i], "-h") && (i+1 == argc)) { | |
57 menu(); | |
58 return 0; | |
59 } | |
60 | |
61 /* Inform about current readAl version/revision/build and finish */ | |
62 if(!strcmp(argv[i], "--version") && (i+1 == argc)) { | |
63 cout << endl << "readAl v" << VERSION << ".rev" << REVISION << " build[" | |
64 << BUILD << "]" << endl << endl; | |
65 return 0; | |
66 } | |
67 | |
68 /* Catch different input options and then check whether there is a valid | |
69 * combination of parameters */ | |
70 while(i < argc) { | |
71 | |
72 /* Input alignment option: -in */ | |
73 if(!strcmp(argv[i], "-in") && (i+1 != argc) && (infile == NULL)) { | |
74 /* Allocate memory for storing input alignment filename */ | |
75 infile = new char[strlen(argv[++i]) + 1]; | |
76 strcpy(infile, argv[i]); | |
77 | |
78 /* Load input alignment and inform about it if something is wrong */ | |
79 if(!inAlig.loadAlignment(infile)) { | |
80 cerr << endl << "ERROR: Alignment not loaded: \"" << infile | |
81 << "\" Check the file's content." << endl << endl; | |
82 errors = true; | |
83 } | |
84 } | |
85 | |
86 /* Output filename option: -out */ | |
87 else if(!strcmp(argv[i], "-out") && (i+1 != argc) && (outfile == NULL)) { | |
88 /* Allocate memory for storing output alignment filename */ | |
89 outfile = new char[strlen(argv[++i]) + 1]; | |
90 strcpy(outfile, argv[i]); | |
91 } | |
92 | |
93 /* Get information about input file format */ | |
94 else if(!strcmp(argv[i], "-format") && (!format)) | |
95 format = true; | |
96 | |
97 /* Get information about input file residues type */ | |
98 else if(!strcmp(argv[i], "-type") && (!type)) | |
99 type = true; | |
100 | |
101 /* Get general information about input file: seqs number, average seq length, | |
102 * etc */ | |
103 else if(!strcmp(argv[i], "-info") && (!info)) | |
104 info = true; | |
105 | |
106 /* Get input sequences reverse option: -reverse */ | |
107 else if(!strcmp(argv[i], "-reverse") && (!reverse)) | |
108 reverse = true; | |
109 | |
110 /* For all output format options is checked if more | |
111 * than one output format has been required */ | |
112 | |
113 /* Set output alignment format to CLUSTAL: -clustal */ | |
114 else if(!strcmp(argv[i], "-clustal") && (outformat == -1)) | |
115 outformat = 1; | |
116 | |
117 /* Set output alignment format to FASTA: -fasta */ | |
118 else if(!strcmp(argv[i], "-fasta") && (outformat == -1)) | |
119 outformat = 8; | |
120 | |
121 /* Set output alignment format to FASTA and ask for using only | |
122 * up to 10 characters for sequences name: -fasta_m10 */ | |
123 else if(!strcmp(argv[i], "-fasta_m10") && (outformat == -1)) { | |
124 outformat = 8; | |
125 shortNames = true; | |
126 } | |
127 | |
128 /* Set output alignment format to NBRF/PIR: -nbrf */ | |
129 else if(!strcmp(argv[i], "-nbrf") && (outformat == -1)) | |
130 outformat = 3; | |
131 | |
132 /* Set output alignment format to NEXUS: -nexus */ | |
133 else if(!strcmp(argv[i], "-nexus") && (outformat == -1)) | |
134 outformat = 17; | |
135 | |
136 /* Set output alignment format to MEGA: -mega */ | |
137 else if(!strcmp(argv[i], "-mega") && (outformat == -1)) | |
138 outformat = 21; | |
139 | |
140 /* Set output alignment format to PHYLIP3.2 (sequential): -phylip3.2 */ | |
141 else if(!strcmp(argv[i], "-phylip3.2") && (outformat == -1)) | |
142 outformat = 11; | |
143 | |
144 /* Set output alignment format to PHYLIP3.2 (sequential) and ask for | |
145 * using only up to 10 characters for sequences name: -phylip3.2_m10 */ | |
146 else if(!strcmp(argv[i], "-phylip3.2_m10") && (outformat == -1)) { | |
147 outformat = 11; | |
148 shortNames = true; | |
149 } | |
150 | |
151 /* Set output alignment format to PHYLIP (interleaved): -phylip */ | |
152 else if(!strcmp(argv[i], "-phylip") && (outformat == -1)) | |
153 outformat = 12; | |
154 | |
155 /* Set output alignment format to PHYLIP (interleaved) and ask for | |
156 * using only up to 10 characters for sequences name: -phylip_m10 */ | |
157 else if(!strcmp(argv[i], "-phylip_m10") && (outformat == -1)) { | |
158 outformat = 12; shortNames = true; | |
159 } | |
160 | |
161 /* Set output alignment format to PHYLIP compatible with programs | |
162 * such as PAML: -phylip_paml */ | |
163 else if(!strcmp(argv[i], "-phylip_paml") && (outformat == -1)) | |
164 outformat = 13; | |
165 | |
166 /* Set output alignment format to PHYLIP compatible with programs such as | |
167 * PAML and ask for using only up to 10 characters for sequences name: | |
168 * -phylip_paml_m10 */ | |
169 else if(!strcmp(argv[i], "-phylip_paml_m10") && (outformat == -1)) { | |
170 outformat = 13; | |
171 shortNames = true; | |
172 } | |
173 | |
174 /* Set output alignment format to HTML, that means residues will be colored | |
175 * according to its physic-chemical properties using CLUSTAL color scheme: | |
176 * -html */ | |
177 else if(!strcmp(argv[i], "-html") && (outformat == -1)) | |
178 outformat = 100; | |
179 | |
180 /* Get unaligned sequences from input file: -onlyseqs */ | |
181 else if(!strcmp(argv[i], "-onlyseqs") && (outformat == -1)) | |
182 outformat = 99; | |
183 | |
184 /* Inform about no valid options */ | |
185 else { | |
186 cerr << endl << "ERROR: Parameter \"" << argv[i] << "\" not valid." | |
187 << endl << endl; | |
188 errors = true; | |
189 } | |
190 i++; | |
191 | |
192 /* If any error has been detected, break input options loop | |
193 * and then process detected error */ | |
194 if(errors) | |
195 break; | |
196 } | |
197 | |
198 /* Final verifications to detect any possible mistake in the input options */ | |
199 /* It is mandatory to provide an input file. Otherwise, inform about it */ | |
200 if((infile == NULL) && (!errors)) { | |
201 cerr << endl << "ERROR: An input file has to be defined." << endl << endl; | |
202 errors = true; | |
203 } | |
204 | |
205 /* It is mandatory to choose an option for processing input alignment */ | |
206 if((outformat == -1) && (!reverse) && (!format) && (!type) && (!info) | |
207 && (!errors)) { | |
208 cerr << endl << "ERROR: An option has to be chosen." << endl << endl; | |
209 errors = true; | |
210 } | |
211 | |
212 /* Only one option can be selected when an output file is not defined */ | |
213 if((outfile == NULL) && ((outformat != -1) || reverse) && (format || type \ | |
214 || info) && (!errors)) { | |
215 cerr << endl << "ERROR: Only one option can be selected: either an output " | |
216 << "format or get information about input file when an output file is " | |
217 << "not defined" << endl << endl; | |
218 errors = true; | |
219 } | |
220 | |
221 /* Does not make any sense to define any output file when | |
222 * only information about input alignment is requested */ | |
223 if(((outfile != NULL) && outformat == -1 && !reverse) && (format || type \ | |
224 || info) && (!errors)) { | |
225 cerr << endl << "ERROR: An output file should not be provided when only " | |
226 << "information about input alignment is requested" << endl << endl; | |
227 errors = true; | |
228 } | |
229 | |
230 /* If no error has been detected, process input file */ | |
231 if(!errors) { | |
232 | |
233 /* Print information about input alignment */ | |
234 if((format) || (type) || (info)) { | |
235 cout << "## Input filename\t'" << infile << "'" << endl; | |
236 | |
237 if(format) { | |
238 /* Input file format */ | |
239 if (inAlig.getInputFormat() == 1) | |
240 align_format = "clustal"; | |
241 else if (inAlig.getInputFormat() == 3) | |
242 align_format = "nbrf/pir"; | |
243 else if (inAlig.getInputFormat() == 8) | |
244 align_format = "fasta"; | |
245 else if (inAlig.getInputFormat() == 11) | |
246 align_format = "phylip3.2"; | |
247 else if (inAlig.getInputFormat() == 12) | |
248 align_format = "phylip"; | |
249 else if (inAlig.getInputFormat() == 17) | |
250 align_format = "nexus"; | |
251 else if (inAlig.getInputFormat() == 21) | |
252 align_format = "mega_interleaved"; | |
253 else if (inAlig.getInputFormat() == 22) | |
254 align_format = "mega_sequential"; | |
255 else | |
256 align_format = "unknown"; | |
257 | |
258 /* Inform about if sequences are aligned or not */ | |
259 cout << "## Input file format\t" << align_format << endl | |
260 << "## Input file aligned\t" << (inAlig.isFileAligned() ? "YES":"NO") | |
261 << endl; | |
262 } | |
263 | |
264 if(type) { | |
265 /* Inform about biological datatype */ | |
266 if (inAlig.getTypeAlignment() == DNAType) | |
267 cout << "## Input file datatype\tnucleotides:dna" << endl; | |
268 else if (inAlig.getTypeAlignment() == DNADeg) | |
269 cout << "## Input file datatype\tnucleotides:dna_degenerate_codes" | |
270 << endl; | |
271 else if (inAlig.getTypeAlignment() == RNAType) | |
272 cout << "## Input file datatype\tnucleotides:rna" << endl; | |
273 else if (inAlig.getTypeAlignment() == RNADeg) | |
274 cout << "## Input file datatype\tnucleotides:rna_degenerate_codes" | |
275 << endl; | |
276 else if (inAlig.getTypeAlignment() == AAType) | |
277 cout << "## Input file datatype\tamino-acids" << endl; | |
278 else | |
279 cout << "## Input file datatype\tunknown" << endl; | |
280 } | |
281 | |
282 if(info) | |
283 inAlig.printAlignmentInfo(cout); | |
284 } | |
285 | |
286 if((outfile != NULL) || (outformat != -1) || reverse || shortNames) { | |
287 /* Set output format */ | |
288 if(outformat != -1 || shortNames) | |
289 inAlig.setOutputFormat(outformat, shortNames); | |
290 /* Ask for getting the reverse of input file */ | |
291 if(reverse) | |
292 inAlig.setReverse(); | |
293 | |
294 /* If a outfile has been provided, try to generate output file */ | |
295 if(outfile != NULL) { | |
296 if(!inAlig.saveAlignment(outfile)) { | |
297 cerr << endl << "ERROR: Impossible to generate OUTPUT file." << endl | |
298 << endl; | |
299 return -1; | |
300 } | |
301 /* ... otherwise dump outfile content to standard output */ | |
302 } else { | |
303 inAlig.printAlignment(); | |
304 } | |
305 } | |
306 } | |
307 | |
308 /* Deallocate local memory */ | |
309 delete [] infile; | |
310 delete [] outfile; | |
311 | |
312 /* Inform about readAl execution */ | |
313 return (errors == true ? -1 : 0); | |
314 } | |
315 | |
316 void menu(void) { | |
317 | |
318 cout << endl | |
319 << "readAl v" << VERSION << ".rev" << REVISION << " build[" << BUILD | |
320 << "]. " << AUTHORS << endl << endl | |
321 | |
322 << "readAl webpage: http://trimal.cgenomics.org" << endl << endl | |
323 | |
324 << "This program is free software: you can redistribute it and/or modify " | |
325 << endl | |
326 << "it under the terms of the GNU General Public License as published by " | |
327 << endl | |
328 << "the Free Software Foundation, the last available version." << endl | |
329 << endl | |
330 | |
331 << "Basic usage" << endl | |
332 << "\treadal -in <inputfile> -out <outputfile> [options]." << endl << endl | |
333 | |
334 << "\t-h " << "Show this information." << endl | |
335 << "\t--version " << "Show readAl version." << endl << endl | |
336 | |
337 << "\t-in <inputfile> " << "Input file in several formats." << endl | |
338 << "\t-out <outputfile> " << "Output file name (default STDOUT)." << endl | |
339 << endl | |
340 | |
341 << "\t-format " << "Print information about input file format " | |
342 << "and if sequences are aligned or not." << endl | |
343 | |
344 << "\t-type " << "Print information about biological " | |
345 << "sequences datatype (e.g. nucleotides:dna, nucleotides:rna, aminoacids, etc)" | |
346 << endl | |
347 | |
348 << "\t-info " << "Print information about sequences number, " | |
349 << "average sequence length, max & min sequence length" | |
350 << endl << endl | |
351 | |
352 << "\t-onlyseqs " << "Generate output with only residues from " | |
353 << "input file" << endl << endl | |
354 | |
355 << "\t-html " << "Output residues colored according their " | |
356 << "physicochemical properties. HTML file." << endl << endl | |
357 | |
358 << "\t-reverse " << "Output the reverse of sequences in " | |
359 << "input file." << endl << endl | |
360 | |
361 << "\t-nbrf " << "Output file in NBRF/PIR format" << endl | |
362 << "\t-mega " << "Output file in MEGA format" << endl | |
363 | |
364 << "\t-nexus " << "Output file in NEXUS format" << endl | |
365 << "\t-clustal " << "Output file in CLUSTAL format" << endl | |
366 << endl | |
367 | |
368 << "\t-fasta " << "Output file in FASTA format" << endl | |
369 << "\t-fasta_m10 " << "Output file in FASTA format. Sequences " | |
370 << "name up to 10 characters." << endl << endl | |
371 | |
372 << "\t-phylip " << "Output file in PHYLIP/PHYLIP4 format" | |
373 << endl | |
374 << "\t-phylip_m10 " << "Output file in PHYLIP/PHYLIP4 format. " | |
375 << "Sequences name up to 10 characters." << endl | |
376 << "\t-phylip_paml " << "Output file in PHYLIP format compatible " | |
377 << "with PAML" << endl | |
378 << "\t-phylip_paml_m10 " << "Output file in PHYLIP format compatible " | |
379 << "with PAML. Sequences name up to 10 characters." << endl | |
380 << "\t-phylip3.2 " << "Output file in PHYLIP3.2 format" << endl | |
381 << "\t-phylip3.2_m10 " << "Output file in PHYLIP3.2 format. Sequences" | |
382 << " name up to 10 characters." << endl << endl; | |
383 } |