comparison trimal_repo/source/readAl.cpp @ 0:b15a3147e604 draft

"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author padge
date Fri, 25 Mar 2022 17:10:43 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b15a3147e604
1 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
2 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
3
4 readAl v1.4: a tool for automated alignment conversion among different
5 formats.
6
7 2009-2015 Capella-Gutierrez S. and Gabaldon, T.
8 [scapella, tgabaldon]@crg.es
9
10 This file is part of readAl.
11
12 readAl is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, the last available version.
15
16 readAl is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with readAl. If not, see <http://www.gnu.org/licenses/>.
23
24 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
25 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
26
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "alignment.h"
31 #include "defines.h"
32 #include "utils.h"
33
34 void menu(void);
35
36 int main(int argc, char *argv[]) {
37
38 /* Input alignment */
39 alignment inAlig;
40
41 /* Local variables */
42 string align_format;
43 int i, outformat = -1;
44 char *infile = NULL, *outfile = NULL;
45 bool errors = false, reverse = false, shortNames = false, format = false, \
46 type = false, info = false;
47
48 /* If there is no parameters: Inform about readAl options and finish */
49 if(argc == 1) {
50 menu();
51 return 0;
52 }
53
54 i = 1;
55 /* If option -h has been used, inform about readAl options and finish */
56 if(!strcmp(argv[i], "-h") && (i+1 == argc)) {
57 menu();
58 return 0;
59 }
60
61 /* Inform about current readAl version/revision/build and finish */
62 if(!strcmp(argv[i], "--version") && (i+1 == argc)) {
63 cout << endl << "readAl v" << VERSION << ".rev" << REVISION << " build["
64 << BUILD << "]" << endl << endl;
65 return 0;
66 }
67
68 /* Catch different input options and then check whether there is a valid
69 * combination of parameters */
70 while(i < argc) {
71
72 /* Input alignment option: -in */
73 if(!strcmp(argv[i], "-in") && (i+1 != argc) && (infile == NULL)) {
74 /* Allocate memory for storing input alignment filename */
75 infile = new char[strlen(argv[++i]) + 1];
76 strcpy(infile, argv[i]);
77
78 /* Load input alignment and inform about it if something is wrong */
79 if(!inAlig.loadAlignment(infile)) {
80 cerr << endl << "ERROR: Alignment not loaded: \"" << infile
81 << "\" Check the file's content." << endl << endl;
82 errors = true;
83 }
84 }
85
86 /* Output filename option: -out */
87 else if(!strcmp(argv[i], "-out") && (i+1 != argc) && (outfile == NULL)) {
88 /* Allocate memory for storing output alignment filename */
89 outfile = new char[strlen(argv[++i]) + 1];
90 strcpy(outfile, argv[i]);
91 }
92
93 /* Get information about input file format */
94 else if(!strcmp(argv[i], "-format") && (!format))
95 format = true;
96
97 /* Get information about input file residues type */
98 else if(!strcmp(argv[i], "-type") && (!type))
99 type = true;
100
101 /* Get general information about input file: seqs number, average seq length,
102 * etc */
103 else if(!strcmp(argv[i], "-info") && (!info))
104 info = true;
105
106 /* Get input sequences reverse option: -reverse */
107 else if(!strcmp(argv[i], "-reverse") && (!reverse))
108 reverse = true;
109
110 /* For all output format options is checked if more
111 * than one output format has been required */
112
113 /* Set output alignment format to CLUSTAL: -clustal */
114 else if(!strcmp(argv[i], "-clustal") && (outformat == -1))
115 outformat = 1;
116
117 /* Set output alignment format to FASTA: -fasta */
118 else if(!strcmp(argv[i], "-fasta") && (outformat == -1))
119 outformat = 8;
120
121 /* Set output alignment format to FASTA and ask for using only
122 * up to 10 characters for sequences name: -fasta_m10 */
123 else if(!strcmp(argv[i], "-fasta_m10") && (outformat == -1)) {
124 outformat = 8;
125 shortNames = true;
126 }
127
128 /* Set output alignment format to NBRF/PIR: -nbrf */
129 else if(!strcmp(argv[i], "-nbrf") && (outformat == -1))
130 outformat = 3;
131
132 /* Set output alignment format to NEXUS: -nexus */
133 else if(!strcmp(argv[i], "-nexus") && (outformat == -1))
134 outformat = 17;
135
136 /* Set output alignment format to MEGA: -mega */
137 else if(!strcmp(argv[i], "-mega") && (outformat == -1))
138 outformat = 21;
139
140 /* Set output alignment format to PHYLIP3.2 (sequential): -phylip3.2 */
141 else if(!strcmp(argv[i], "-phylip3.2") && (outformat == -1))
142 outformat = 11;
143
144 /* Set output alignment format to PHYLIP3.2 (sequential) and ask for
145 * using only up to 10 characters for sequences name: -phylip3.2_m10 */
146 else if(!strcmp(argv[i], "-phylip3.2_m10") && (outformat == -1)) {
147 outformat = 11;
148 shortNames = true;
149 }
150
151 /* Set output alignment format to PHYLIP (interleaved): -phylip */
152 else if(!strcmp(argv[i], "-phylip") && (outformat == -1))
153 outformat = 12;
154
155 /* Set output alignment format to PHYLIP (interleaved) and ask for
156 * using only up to 10 characters for sequences name: -phylip_m10 */
157 else if(!strcmp(argv[i], "-phylip_m10") && (outformat == -1)) {
158 outformat = 12; shortNames = true;
159 }
160
161 /* Set output alignment format to PHYLIP compatible with programs
162 * such as PAML: -phylip_paml */
163 else if(!strcmp(argv[i], "-phylip_paml") && (outformat == -1))
164 outformat = 13;
165
166 /* Set output alignment format to PHYLIP compatible with programs such as
167 * PAML and ask for using only up to 10 characters for sequences name:
168 * -phylip_paml_m10 */
169 else if(!strcmp(argv[i], "-phylip_paml_m10") && (outformat == -1)) {
170 outformat = 13;
171 shortNames = true;
172 }
173
174 /* Set output alignment format to HTML, that means residues will be colored
175 * according to its physic-chemical properties using CLUSTAL color scheme:
176 * -html */
177 else if(!strcmp(argv[i], "-html") && (outformat == -1))
178 outformat = 100;
179
180 /* Get unaligned sequences from input file: -onlyseqs */
181 else if(!strcmp(argv[i], "-onlyseqs") && (outformat == -1))
182 outformat = 99;
183
184 /* Inform about no valid options */
185 else {
186 cerr << endl << "ERROR: Parameter \"" << argv[i] << "\" not valid."
187 << endl << endl;
188 errors = true;
189 }
190 i++;
191
192 /* If any error has been detected, break input options loop
193 * and then process detected error */
194 if(errors)
195 break;
196 }
197
198 /* Final verifications to detect any possible mistake in the input options */
199 /* It is mandatory to provide an input file. Otherwise, inform about it */
200 if((infile == NULL) && (!errors)) {
201 cerr << endl << "ERROR: An input file has to be defined." << endl << endl;
202 errors = true;
203 }
204
205 /* It is mandatory to choose an option for processing input alignment */
206 if((outformat == -1) && (!reverse) && (!format) && (!type) && (!info)
207 && (!errors)) {
208 cerr << endl << "ERROR: An option has to be chosen." << endl << endl;
209 errors = true;
210 }
211
212 /* Only one option can be selected when an output file is not defined */
213 if((outfile == NULL) && ((outformat != -1) || reverse) && (format || type \
214 || info) && (!errors)) {
215 cerr << endl << "ERROR: Only one option can be selected: either an output "
216 << "format or get information about input file when an output file is "
217 << "not defined" << endl << endl;
218 errors = true;
219 }
220
221 /* Does not make any sense to define any output file when
222 * only information about input alignment is requested */
223 if(((outfile != NULL) && outformat == -1 && !reverse) && (format || type \
224 || info) && (!errors)) {
225 cerr << endl << "ERROR: An output file should not be provided when only "
226 << "information about input alignment is requested" << endl << endl;
227 errors = true;
228 }
229
230 /* If no error has been detected, process input file */
231 if(!errors) {
232
233 /* Print information about input alignment */
234 if((format) || (type) || (info)) {
235 cout << "## Input filename\t'" << infile << "'" << endl;
236
237 if(format) {
238 /* Input file format */
239 if (inAlig.getInputFormat() == 1)
240 align_format = "clustal";
241 else if (inAlig.getInputFormat() == 3)
242 align_format = "nbrf/pir";
243 else if (inAlig.getInputFormat() == 8)
244 align_format = "fasta";
245 else if (inAlig.getInputFormat() == 11)
246 align_format = "phylip3.2";
247 else if (inAlig.getInputFormat() == 12)
248 align_format = "phylip";
249 else if (inAlig.getInputFormat() == 17)
250 align_format = "nexus";
251 else if (inAlig.getInputFormat() == 21)
252 align_format = "mega_interleaved";
253 else if (inAlig.getInputFormat() == 22)
254 align_format = "mega_sequential";
255 else
256 align_format = "unknown";
257
258 /* Inform about if sequences are aligned or not */
259 cout << "## Input file format\t" << align_format << endl
260 << "## Input file aligned\t" << (inAlig.isFileAligned() ? "YES":"NO")
261 << endl;
262 }
263
264 if(type) {
265 /* Inform about biological datatype */
266 if (inAlig.getTypeAlignment() == DNAType)
267 cout << "## Input file datatype\tnucleotides:dna" << endl;
268 else if (inAlig.getTypeAlignment() == DNADeg)
269 cout << "## Input file datatype\tnucleotides:dna_degenerate_codes"
270 << endl;
271 else if (inAlig.getTypeAlignment() == RNAType)
272 cout << "## Input file datatype\tnucleotides:rna" << endl;
273 else if (inAlig.getTypeAlignment() == RNADeg)
274 cout << "## Input file datatype\tnucleotides:rna_degenerate_codes"
275 << endl;
276 else if (inAlig.getTypeAlignment() == AAType)
277 cout << "## Input file datatype\tamino-acids" << endl;
278 else
279 cout << "## Input file datatype\tunknown" << endl;
280 }
281
282 if(info)
283 inAlig.printAlignmentInfo(cout);
284 }
285
286 if((outfile != NULL) || (outformat != -1) || reverse || shortNames) {
287 /* Set output format */
288 if(outformat != -1 || shortNames)
289 inAlig.setOutputFormat(outformat, shortNames);
290 /* Ask for getting the reverse of input file */
291 if(reverse)
292 inAlig.setReverse();
293
294 /* If a outfile has been provided, try to generate output file */
295 if(outfile != NULL) {
296 if(!inAlig.saveAlignment(outfile)) {
297 cerr << endl << "ERROR: Impossible to generate OUTPUT file." << endl
298 << endl;
299 return -1;
300 }
301 /* ... otherwise dump outfile content to standard output */
302 } else {
303 inAlig.printAlignment();
304 }
305 }
306 }
307
308 /* Deallocate local memory */
309 delete [] infile;
310 delete [] outfile;
311
312 /* Inform about readAl execution */
313 return (errors == true ? -1 : 0);
314 }
315
316 void menu(void) {
317
318 cout << endl
319 << "readAl v" << VERSION << ".rev" << REVISION << " build[" << BUILD
320 << "]. " << AUTHORS << endl << endl
321
322 << "readAl webpage: http://trimal.cgenomics.org" << endl << endl
323
324 << "This program is free software: you can redistribute it and/or modify "
325 << endl
326 << "it under the terms of the GNU General Public License as published by "
327 << endl
328 << "the Free Software Foundation, the last available version." << endl
329 << endl
330
331 << "Basic usage" << endl
332 << "\treadal -in <inputfile> -out <outputfile> [options]." << endl << endl
333
334 << "\t-h " << "Show this information." << endl
335 << "\t--version " << "Show readAl version." << endl << endl
336
337 << "\t-in <inputfile> " << "Input file in several formats." << endl
338 << "\t-out <outputfile> " << "Output file name (default STDOUT)." << endl
339 << endl
340
341 << "\t-format " << "Print information about input file format "
342 << "and if sequences are aligned or not." << endl
343
344 << "\t-type " << "Print information about biological "
345 << "sequences datatype (e.g. nucleotides:dna, nucleotides:rna, aminoacids, etc)"
346 << endl
347
348 << "\t-info " << "Print information about sequences number, "
349 << "average sequence length, max & min sequence length"
350 << endl << endl
351
352 << "\t-onlyseqs " << "Generate output with only residues from "
353 << "input file" << endl << endl
354
355 << "\t-html " << "Output residues colored according their "
356 << "physicochemical properties. HTML file." << endl << endl
357
358 << "\t-reverse " << "Output the reverse of sequences in "
359 << "input file." << endl << endl
360
361 << "\t-nbrf " << "Output file in NBRF/PIR format" << endl
362 << "\t-mega " << "Output file in MEGA format" << endl
363
364 << "\t-nexus " << "Output file in NEXUS format" << endl
365 << "\t-clustal " << "Output file in CLUSTAL format" << endl
366 << endl
367
368 << "\t-fasta " << "Output file in FASTA format" << endl
369 << "\t-fasta_m10 " << "Output file in FASTA format. Sequences "
370 << "name up to 10 characters." << endl << endl
371
372 << "\t-phylip " << "Output file in PHYLIP/PHYLIP4 format"
373 << endl
374 << "\t-phylip_m10 " << "Output file in PHYLIP/PHYLIP4 format. "
375 << "Sequences name up to 10 characters." << endl
376 << "\t-phylip_paml " << "Output file in PHYLIP format compatible "
377 << "with PAML" << endl
378 << "\t-phylip_paml_m10 " << "Output file in PHYLIP format compatible "
379 << "with PAML. Sequences name up to 10 characters." << endl
380 << "\t-phylip3.2 " << "Output file in PHYLIP3.2 format" << endl
381 << "\t-phylip3.2_m10 " << "Output file in PHYLIP3.2 format. Sequences"
382 << " name up to 10 characters." << endl << endl;
383 }