1
|
1 /*
|
|
2 Copyright 2008-2009 Stéphane De Mita, Mathieu Siol
|
|
3
|
|
4 This file is part of EggLib.
|
|
5
|
|
6 EggLib is free software: you can redistribute it and/or modify
|
|
7 it under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation, either version 3 of the License, or
|
|
9 (at your option) any later version.
|
|
10
|
|
11 EggLib is distributed in the hope that it will be useful,
|
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 GNU General Public License for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
|
|
18 */
|
|
19
|
|
20 #ifndef EGGLIB_STADEN_HPP
|
|
21 #define EGGLIB_STADEN_HPP
|
|
22
|
|
23 #include <string>
|
|
24 #include <istream>
|
|
25 #include "Align.hpp"
|
|
26
|
|
27 namespace egglib {
|
|
28
|
|
29 /** \brief Parser of Staden output format
|
|
30 *
|
|
31 * \ingroup core
|
|
32 *
|
|
33 * The parser is available as a static method. It takes either a
|
|
34 * stream or a string containing data formatted by the program GAP4
|
|
35 * of the Staden package (command 'dump contig to file').
|
|
36 *
|
|
37 */
|
|
38 class Staden {
|
|
39
|
|
40 public:
|
|
41
|
|
42
|
|
43 /** \brief Parses a string
|
|
44 *
|
|
45 * \param string a string containing an alignment formatted
|
|
46 * by the program GAP4 of the Staden package.
|
|
47 *
|
|
48 * \param deleteConsensus if true, the sequence named
|
|
49 * "CONSENSUS" is deleted from the file (if it is present).
|
|
50 *
|
|
51 * \return An Align instance containing the data found in
|
|
52 * the Staden while, after recoding the character following
|
|
53 * the standard codes.
|
|
54 *
|
|
55 * This method opens a stream to the string and calls the
|
|
56 * overloaded method.
|
|
57 *
|
|
58 * The character replacement rules assume Staden default
|
|
59 * convention, as follows:
|
|
60 * - "-" codes for an unknown base and is replaced by "N".
|
|
61 * - "*" codes for an alignment gap and is replaced by "-".
|
|
62 * - A white space represents missing data and is replaced
|
|
63 * by "?".
|
|
64 *
|
|
65 */
|
|
66 static Align parse(const std::string& string, bool deleteConsensus=true);
|
|
67
|
|
68
|
|
69 /** \brief Parses an open stream
|
|
70 *
|
|
71 * \param stream the open containing an alignment formatted
|
|
72 * by the program GAP4 of the Staden package.
|
|
73 *
|
|
74 * \param deleteConsensus if true, the sequence named
|
|
75 * "CONSENSUS" is deleted from the file (if it is present).
|
|
76 *
|
|
77 * \return An Align instance containing the data found in
|
|
78 * the Staden while, after recoding the character following
|
|
79 * the standard codes.
|
|
80 *
|
|
81 * The character replacement rules assume Staden default
|
|
82 * convention, as follows:
|
|
83 * - "-" codes for an unknown base and is replaced by "N".
|
|
84 * - "*" codes for an alignment gap and is replaced by "-".
|
|
85 * - A white space represents missing data and is replaced
|
|
86 * by "?".
|
|
87 *
|
|
88 */
|
|
89 static Align parse(std::istream& stream, bool deleteConsensus=true);
|
|
90
|
|
91
|
|
92 private:
|
|
93
|
|
94 /// Not allowed to instantiate this class
|
|
95 Staden() { }
|
|
96
|
|
97 /// Not allowed to instantiate this class
|
|
98 Staden(const Staden& source) { }
|
|
99
|
|
100 /// Not allowed to instantiate this class
|
|
101 ~Staden() { }
|
|
102
|
|
103
|
|
104 /* Gets the start position of sequences
|
|
105 *
|
|
106 * The functions gives total number of characters before the start of sequences
|
|
107 * and reads through until the next backspace (ignores the first line).
|
|
108 */
|
|
109 static void getShift();
|
|
110
|
|
111 // Translates according to the Staden format
|
|
112 static char transforme(char);
|
|
113
|
|
114 // Imports one sequence
|
|
115 static bool readOneSequence();
|
|
116
|
|
117 // Imports and concatenates one sequence
|
|
118 static bool readAppendOneSequence();
|
|
119
|
|
120 // Replaces dots by the matching character from CONSENSUS
|
|
121 static void undot(bool delete_consensus=true);
|
|
122
|
|
123 // The number of characters before the start of sequences
|
|
124 static int shift;
|
|
125
|
|
126 // The dynamically filled container (will result in an aligment)
|
|
127 static Container container;
|
|
128
|
|
129 // The current position
|
|
130 static int currpos;
|
|
131
|
|
132 // The reading stream
|
|
133 static std::istream* stream;
|
|
134
|
|
135 // Stores unique 8 characters discriminating readings
|
|
136 static std::vector<std::string> ID;
|
|
137 };
|
|
138 }
|
|
139
|
|
140 #endif
|