Mercurial > repos > padge > trimal
diff trimal_repo/source/readAl.cpp @ 0:b15a3147e604 draft
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author | padge |
---|---|
date | Fri, 25 Mar 2022 17:10:43 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trimal_repo/source/readAl.cpp Fri Mar 25 17:10:43 2022 +0000 @@ -0,0 +1,383 @@ +/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** + ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** + + readAl v1.4: a tool for automated alignment conversion among different + formats. + + 2009-2015 Capella-Gutierrez S. and Gabaldon, T. + [scapella, tgabaldon]@crg.es + + This file is part of readAl. + + readAl is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, the last available version. + + readAl is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with readAl. If not, see <http://www.gnu.org/licenses/>. + +***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** +***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + +#include <stdlib.h> +#include <string.h> + +#include "alignment.h" +#include "defines.h" +#include "utils.h" + +void menu(void); + +int main(int argc, char *argv[]) { + + /* Input alignment */ + alignment inAlig; + + /* Local variables */ + string align_format; + int i, outformat = -1; + char *infile = NULL, *outfile = NULL; + bool errors = false, reverse = false, shortNames = false, format = false, \ + type = false, info = false; + + /* If there is no parameters: Inform about readAl options and finish */ + if(argc == 1) { + menu(); + return 0; + } + + i = 1; + /* If option -h has been used, inform about readAl options and finish */ + if(!strcmp(argv[i], "-h") && (i+1 == argc)) { + menu(); + return 0; + } + + /* Inform about current readAl version/revision/build and finish */ + if(!strcmp(argv[i], "--version") && (i+1 == argc)) { + cout << endl << "readAl v" << VERSION << ".rev" << REVISION << " build[" + << BUILD << "]" << endl << endl; + return 0; + } + + /* Catch different input options and then check whether there is a valid + * combination of parameters */ + while(i < argc) { + + /* Input alignment option: -in */ + if(!strcmp(argv[i], "-in") && (i+1 != argc) && (infile == NULL)) { + /* Allocate memory for storing input alignment filename */ + infile = new char[strlen(argv[++i]) + 1]; + strcpy(infile, argv[i]); + + /* Load input alignment and inform about it if something is wrong */ + if(!inAlig.loadAlignment(infile)) { + cerr << endl << "ERROR: Alignment not loaded: \"" << infile + << "\" Check the file's content." << endl << endl; + errors = true; + } + } + + /* Output filename option: -out */ + else if(!strcmp(argv[i], "-out") && (i+1 != argc) && (outfile == NULL)) { + /* Allocate memory for storing output alignment filename */ + outfile = new char[strlen(argv[++i]) + 1]; + strcpy(outfile, argv[i]); + } + + /* Get information about input file format */ + else if(!strcmp(argv[i], "-format") && (!format)) + format = true; + + /* Get information about input file residues type */ + else if(!strcmp(argv[i], "-type") && (!type)) + type = true; + + /* Get general information about input file: seqs number, average seq length, + * etc */ + else if(!strcmp(argv[i], "-info") && (!info)) + info = true; + + /* Get input sequences reverse option: -reverse */ + else if(!strcmp(argv[i], "-reverse") && (!reverse)) + reverse = true; + + /* For all output format options is checked if more + * than one output format has been required */ + + /* Set output alignment format to CLUSTAL: -clustal */ + else if(!strcmp(argv[i], "-clustal") && (outformat == -1)) + outformat = 1; + + /* Set output alignment format to FASTA: -fasta */ + else if(!strcmp(argv[i], "-fasta") && (outformat == -1)) + outformat = 8; + + /* Set output alignment format to FASTA and ask for using only + * up to 10 characters for sequences name: -fasta_m10 */ + else if(!strcmp(argv[i], "-fasta_m10") && (outformat == -1)) { + outformat = 8; + shortNames = true; + } + + /* Set output alignment format to NBRF/PIR: -nbrf */ + else if(!strcmp(argv[i], "-nbrf") && (outformat == -1)) + outformat = 3; + + /* Set output alignment format to NEXUS: -nexus */ + else if(!strcmp(argv[i], "-nexus") && (outformat == -1)) + outformat = 17; + + /* Set output alignment format to MEGA: -mega */ + else if(!strcmp(argv[i], "-mega") && (outformat == -1)) + outformat = 21; + + /* Set output alignment format to PHYLIP3.2 (sequential): -phylip3.2 */ + else if(!strcmp(argv[i], "-phylip3.2") && (outformat == -1)) + outformat = 11; + + /* Set output alignment format to PHYLIP3.2 (sequential) and ask for + * using only up to 10 characters for sequences name: -phylip3.2_m10 */ + else if(!strcmp(argv[i], "-phylip3.2_m10") && (outformat == -1)) { + outformat = 11; + shortNames = true; + } + + /* Set output alignment format to PHYLIP (interleaved): -phylip */ + else if(!strcmp(argv[i], "-phylip") && (outformat == -1)) + outformat = 12; + + /* Set output alignment format to PHYLIP (interleaved) and ask for + * using only up to 10 characters for sequences name: -phylip_m10 */ + else if(!strcmp(argv[i], "-phylip_m10") && (outformat == -1)) { + outformat = 12; shortNames = true; + } + + /* Set output alignment format to PHYLIP compatible with programs + * such as PAML: -phylip_paml */ + else if(!strcmp(argv[i], "-phylip_paml") && (outformat == -1)) + outformat = 13; + + /* Set output alignment format to PHYLIP compatible with programs such as + * PAML and ask for using only up to 10 characters for sequences name: + * -phylip_paml_m10 */ + else if(!strcmp(argv[i], "-phylip_paml_m10") && (outformat == -1)) { + outformat = 13; + shortNames = true; + } + + /* Set output alignment format to HTML, that means residues will be colored + * according to its physic-chemical properties using CLUSTAL color scheme: + * -html */ + else if(!strcmp(argv[i], "-html") && (outformat == -1)) + outformat = 100; + + /* Get unaligned sequences from input file: -onlyseqs */ + else if(!strcmp(argv[i], "-onlyseqs") && (outformat == -1)) + outformat = 99; + + /* Inform about no valid options */ + else { + cerr << endl << "ERROR: Parameter \"" << argv[i] << "\" not valid." + << endl << endl; + errors = true; + } + i++; + + /* If any error has been detected, break input options loop + * and then process detected error */ + if(errors) + break; + } + + /* Final verifications to detect any possible mistake in the input options */ + /* It is mandatory to provide an input file. Otherwise, inform about it */ + if((infile == NULL) && (!errors)) { + cerr << endl << "ERROR: An input file has to be defined." << endl << endl; + errors = true; + } + + /* It is mandatory to choose an option for processing input alignment */ + if((outformat == -1) && (!reverse) && (!format) && (!type) && (!info) + && (!errors)) { + cerr << endl << "ERROR: An option has to be chosen." << endl << endl; + errors = true; + } + + /* Only one option can be selected when an output file is not defined */ + if((outfile == NULL) && ((outformat != -1) || reverse) && (format || type \ + || info) && (!errors)) { + cerr << endl << "ERROR: Only one option can be selected: either an output " + << "format or get information about input file when an output file is " + << "not defined" << endl << endl; + errors = true; + } + + /* Does not make any sense to define any output file when + * only information about input alignment is requested */ + if(((outfile != NULL) && outformat == -1 && !reverse) && (format || type \ + || info) && (!errors)) { + cerr << endl << "ERROR: An output file should not be provided when only " + << "information about input alignment is requested" << endl << endl; + errors = true; + } + + /* If no error has been detected, process input file */ + if(!errors) { + + /* Print information about input alignment */ + if((format) || (type) || (info)) { + cout << "## Input filename\t'" << infile << "'" << endl; + + if(format) { + /* Input file format */ + if (inAlig.getInputFormat() == 1) + align_format = "clustal"; + else if (inAlig.getInputFormat() == 3) + align_format = "nbrf/pir"; + else if (inAlig.getInputFormat() == 8) + align_format = "fasta"; + else if (inAlig.getInputFormat() == 11) + align_format = "phylip3.2"; + else if (inAlig.getInputFormat() == 12) + align_format = "phylip"; + else if (inAlig.getInputFormat() == 17) + align_format = "nexus"; + else if (inAlig.getInputFormat() == 21) + align_format = "mega_interleaved"; + else if (inAlig.getInputFormat() == 22) + align_format = "mega_sequential"; + else + align_format = "unknown"; + + /* Inform about if sequences are aligned or not */ + cout << "## Input file format\t" << align_format << endl + << "## Input file aligned\t" << (inAlig.isFileAligned() ? "YES":"NO") + << endl; + } + + if(type) { + /* Inform about biological datatype */ + if (inAlig.getTypeAlignment() == DNAType) + cout << "## Input file datatype\tnucleotides:dna" << endl; + else if (inAlig.getTypeAlignment() == DNADeg) + cout << "## Input file datatype\tnucleotides:dna_degenerate_codes" + << endl; + else if (inAlig.getTypeAlignment() == RNAType) + cout << "## Input file datatype\tnucleotides:rna" << endl; + else if (inAlig.getTypeAlignment() == RNADeg) + cout << "## Input file datatype\tnucleotides:rna_degenerate_codes" + << endl; + else if (inAlig.getTypeAlignment() == AAType) + cout << "## Input file datatype\tamino-acids" << endl; + else + cout << "## Input file datatype\tunknown" << endl; + } + + if(info) + inAlig.printAlignmentInfo(cout); + } + + if((outfile != NULL) || (outformat != -1) || reverse || shortNames) { + /* Set output format */ + if(outformat != -1 || shortNames) + inAlig.setOutputFormat(outformat, shortNames); + /* Ask for getting the reverse of input file */ + if(reverse) + inAlig.setReverse(); + + /* If a outfile has been provided, try to generate output file */ + if(outfile != NULL) { + if(!inAlig.saveAlignment(outfile)) { + cerr << endl << "ERROR: Impossible to generate OUTPUT file." << endl + << endl; + return -1; + } + /* ... otherwise dump outfile content to standard output */ + } else { + inAlig.printAlignment(); + } + } + } + + /* Deallocate local memory */ + delete [] infile; + delete [] outfile; + + /* Inform about readAl execution */ + return (errors == true ? -1 : 0); +} + +void menu(void) { + + cout << endl + << "readAl v" << VERSION << ".rev" << REVISION << " build[" << BUILD + << "]. " << AUTHORS << endl << endl + + << "readAl webpage: http://trimal.cgenomics.org" << endl << endl + + << "This program is free software: you can redistribute it and/or modify " + << endl + << "it under the terms of the GNU General Public License as published by " + << endl + << "the Free Software Foundation, the last available version." << endl + << endl + + << "Basic usage" << endl + << "\treadal -in <inputfile> -out <outputfile> [options]." << endl << endl + + << "\t-h " << "Show this information." << endl + << "\t--version " << "Show readAl version." << endl << endl + + << "\t-in <inputfile> " << "Input file in several formats." << endl + << "\t-out <outputfile> " << "Output file name (default STDOUT)." << endl + << endl + + << "\t-format " << "Print information about input file format " + << "and if sequences are aligned or not." << endl + + << "\t-type " << "Print information about biological " + << "sequences datatype (e.g. nucleotides:dna, nucleotides:rna, aminoacids, etc)" + << endl + + << "\t-info " << "Print information about sequences number, " + << "average sequence length, max & min sequence length" + << endl << endl + + << "\t-onlyseqs " << "Generate output with only residues from " + << "input file" << endl << endl + + << "\t-html " << "Output residues colored according their " + << "physicochemical properties. HTML file." << endl << endl + + << "\t-reverse " << "Output the reverse of sequences in " + << "input file." << endl << endl + + << "\t-nbrf " << "Output file in NBRF/PIR format" << endl + << "\t-mega " << "Output file in MEGA format" << endl + + << "\t-nexus " << "Output file in NEXUS format" << endl + << "\t-clustal " << "Output file in CLUSTAL format" << endl + << endl + + << "\t-fasta " << "Output file in FASTA format" << endl + << "\t-fasta_m10 " << "Output file in FASTA format. Sequences " + << "name up to 10 characters." << endl << endl + + << "\t-phylip " << "Output file in PHYLIP/PHYLIP4 format" + << endl + << "\t-phylip_m10 " << "Output file in PHYLIP/PHYLIP4 format. " + << "Sequences name up to 10 characters." << endl + << "\t-phylip_paml " << "Output file in PHYLIP format compatible " + << "with PAML" << endl + << "\t-phylip_paml_m10 " << "Output file in PHYLIP format compatible " + << "with PAML. Sequences name up to 10 characters." << endl + << "\t-phylip3.2 " << "Output file in PHYLIP3.2 format" << endl + << "\t-phylip3.2_m10 " << "Output file in PHYLIP3.2 format. Sequences" + << " name up to 10 characters." << endl << endl; +}