Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/Fasta.hpp @ 3:345f88a8f483 draft
Uploaded
| author | dereeper |
|---|---|
| date | Fri, 10 Jul 2015 10:38:43 -0400 |
| parents | 420b57c3c185 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:feb40a9a8eae | 3:345f88a8f483 |
|---|---|
| 1 /* | |
| 2 Copyright 2008-2009 Stéphane De Mita, Mathieu Siol | |
| 3 | |
| 4 This file is part of the EggLib library. | |
| 5 | |
| 6 EggLib is free software: you can redistribute it and/or modify | |
| 7 it under the terms of the GNU General Public License as published by | |
| 8 the Free Software Foundation, either version 3 of the License, or | |
| 9 (at your option) any later version. | |
| 10 | |
| 11 EggLib is distributed in the hope that it will be useful, | |
| 12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 14 GNU General Public License for more details. | |
| 15 | |
| 16 You should have received a copy of the GNU General Public License | |
| 17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
| 18 */ | |
| 19 | |
| 20 #ifndef EGGLIB_FASTA_HPP | |
| 21 #define EGGLIB_FASTA_HPP | |
| 22 | |
| 23 #include <istream> | |
| 24 #include <iostream> | |
| 25 #include <string> | |
| 26 #include "Container.hpp" | |
| 27 | |
| 28 namespace egglib { | |
| 29 | |
| 30 /** \brief Fasta parser/formatted | |
| 31 * | |
| 32 * \ingroup core | |
| 33 * | |
| 34 * Reads a multifasta sequence file from a string, a stream or a file | |
| 35 * and returns a Container. See the description of the format below. | |
| 36 * Formats a fasta string from a sequence container object and places | |
| 37 * it in a string, a stream of a file. All methods are static and the | |
| 38 * class cannot be instantiated. The methods parsef and formatf will | |
| 39 * open the file for you while the others will read/write directly | |
| 40 * in a string. | |
| 41 * | |
| 42 * Specifications of the fasta format: | |
| 43 * | |
| 44 * - The number of sequences is not limited. | |
| 45 * | |
| 46 * - Each sequence is preceded by a header limited to a single | |
| 47 * line and starting by a ">" character. | |
| 48 * | |
| 49 * - The header length is not limited and all characters are | |
| 50 * allowed but white spaces and special characters are | |
| 51 * discouraged. | |
| 52 * | |
| 53 * - Group indices are specified by \@0, \@1, \@2... strings | |
| 54 * appearing at the end of the header string (just before the | |
| 55 * carriage return). Note that group labels are ignored by | |
| 56 * default. | |
| 57 * | |
| 58 * - Group indices are ignored unless specifically specified in a | |
| 59 * parser's options. | |
| 60 * | |
| 61 * - The sequence itself continues on following lines until the | |
| 62 * next ">" character or the end of the file. | |
| 63 * | |
| 64 * - White spaces, tab and carriage returns are allowed at any | |
| 65 * position There is no limitation in length and different | |
| 66 * sequences can have different lengths. | |
| 67 * | |
| 68 * - Although the standard is lower case characters, Fasta | |
| 69 * assumes upper case characters and only supports lower case | |
| 70 * characters (and converts them to upper case characters). | |
| 71 * Information coded by change in case is lost. | |
| 72 * | |
| 73 */ | |
| 74 class Fasta { | |
| 75 | |
| 76 public: | |
| 77 | |
| 78 /** \brief Imports a fasta file | |
| 79 * | |
| 80 * Imports the content of the file as is. Calls the method | |
| 81 * pase(std::istream*, bool) by creating its own istream. | |
| 82 * | |
| 83 * \param fname the name of a fasta file. | |
| 84 * | |
| 85 * \param importGroupLabels if set to true, scan automatically | |
| 86 * for groups. The format is @ followed by an integer, placed | |
| 87 * at the end of the header string(sequences without labels | |
| 88 * will be treated as \@0). | |
| 89 * | |
| 90 * \return A Container object containing the sequences. | |
| 91 * | |
| 92 */ | |
| 93 static Container parsef(const char* fname, bool importGroupLabels=false); | |
| 94 | |
| 95 | |
| 96 /** \brief Imports a fasta file | |
| 97 * | |
| 98 * Imports the content of the file as is. Calls the method | |
| 99 * pase(std::istream*, bool) by creating its own istream. This | |
| 100 * method expects a reference to a Container to which the | |
| 101 * sequences will be appended. | |
| 102 * | |
| 103 * \param fname the name of a fasta file. | |
| 104 * | |
| 105 * \param container a Container instance, empty or not. | |
| 106 * | |
| 107 * \param importGroupLabels if set to true, scan automatically | |
| 108 * for groups. The format is @ followed by an integer, placed | |
| 109 * at the end of the header string(sequences without labels | |
| 110 * will be treated as \@0). | |
| 111 * | |
| 112 * \return Nothings: the new sequences are appended to the | |
| 113 * Container passed as argument. | |
| 114 * | |
| 115 */ | |
| 116 static void parsef(const char* fname, Container& container, bool importGroupLabels=false); | |
| 117 | |
| 118 | |
| 119 /** \brief Imports a fasta file | |
| 120 * | |
| 121 * Imports the content of the file as is. Calls the method | |
| 122 * pase(std::istream*, bool) by creating its own istream. | |
| 123 * | |
| 124 * \param str a string containing the data. | |
| 125 * | |
| 126 * \param importGroupLabels if set to true, scan automatically | |
| 127 * for groups. The format is @ followed by an integer, placed | |
| 128 * at the end of the header string(sequences without labels | |
| 129 * will be treated as \@0). | |
| 130 * | |
| 131 * \return A Container object containing the sequences. | |
| 132 * | |
| 133 */ | |
| 134 static Container parse(const std::string& str, bool importGroupLabels=false); | |
| 135 | |
| 136 | |
| 137 /** \brief Imports a fasta file | |
| 138 * | |
| 139 * Imports the content of the file as is. Calls the method | |
| 140 * pase(std::istream*, bool) by creating its own istream. This | |
| 141 * method expects a reference to a Container to which the | |
| 142 * sequences will be appended. | |
| 143 * | |
| 144 * \param str a string containing the data. | |
| 145 * | |
| 146 * \param container a Container instance, empty or not. | |
| 147 * | |
| 148 * \param importGroupLabels if set to true, scan automatically | |
| 149 * for groups. The format is @ followed by an integer, placed | |
| 150 * at the end of the header string(sequences without labels | |
| 151 * will be treated as \@0). | |
| 152 * | |
| 153 * \return Nothing: new sequences are appended to the Container | |
| 154 * passed as argument. | |
| 155 * | |
| 156 */ | |
| 157 static void parse(const std::string& str, Container& container, bool importGroupLabels=false); | |
| 158 | |
| 159 | |
| 160 /** \brief Imports a fasta file from an open stream | |
| 161 * | |
| 162 * Imports the content of the file as is. | |
| 163 * | |
| 164 * \param stream an open stream (file or string) containing the | |
| 165 * data. | |
| 166 * | |
| 167 * \param importGroupLabels if set to true, scan automatically | |
| 168 * for groups. The format is @ followed by an integer, placed | |
| 169 * at the end of the header string(sequences without labels | |
| 170 * will be treated as \@0). | |
| 171 * | |
| 172 * \return A Container object containing the sequences. | |
| 173 * | |
| 174 */ | |
| 175 static Container parse(std::istream& stream, bool importGroupLabels=false); | |
| 176 | |
| 177 | |
| 178 /** \brief Imports a fasta file from an open stream | |
| 179 * | |
| 180 * Imports the content of the file as is. This | |
| 181 * method expects a reference to a Container to which the | |
| 182 * sequences will be appended. | |
| 183 * | |
| 184 * \param stream an open stream (file or string) containing the | |
| 185 * data. | |
| 186 * | |
| 187 * \param container a Container instance, empty or not. | |
| 188 * | |
| 189 * \param importGroupLabels if set to true, scan automatically | |
| 190 * for groups. The format is @ followed by an integer, placed | |
| 191 * at the end of the header string(sequences without labels | |
| 192 * will be treated as \@0). | |
| 193 * | |
| 194 * \return Nothing: the new sequences are appended to the | |
| 195 * Container passed as argument. | |
| 196 * | |
| 197 */ | |
| 198 static void parse(std::istream& stream, Container& container, bool importGroupLabels=false); | |
| 199 | |
| 200 | |
| 201 /** \brief Export sequences as fasta | |
| 202 * | |
| 203 * \param fname the name of the file where to place the result. | |
| 204 * | |
| 205 * \param container Container object to export. | |
| 206 * | |
| 207 * \param exportGroupLabels if set to true, exports group | |
| 208 * indices as a \@x at the end of the sequence name, where x is | |
| 209 * the group index. Otherwise, this information is discarded. | |
| 210 * | |
| 211 * \param lineLength the number of characters to place on a | |
| 212 * single line. If zero, no newlines are inserted within | |
| 213 * sequences. | |
| 214 * | |
| 215 */ | |
| 216 static void formatf(const char* fname, const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50); | |
| 217 | |
| 218 | |
| 219 /** \brief Export sequences as fasta | |
| 220 * | |
| 221 * \param file an open stream. | |
| 222 * | |
| 223 * \param container Container object to export. | |
| 224 * | |
| 225 * \param exportGroupLabels if set to true, exports group | |
| 226 * indices as a \@x at the end of the sequence name, where x is | |
| 227 * the group index. Otherwise, this information is discarded. | |
| 228 * | |
| 229 * \param lineLength the number of characters to place on a | |
| 230 * single line. If zero, no newlines are inserted within | |
| 231 * sequences. | |
| 232 * | |
| 233 */ | |
| 234 static void format(std::ostream& file, const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50); | |
| 235 | |
| 236 | |
| 237 /** \brief Export sequences as fasta | |
| 238 * | |
| 239 * This medod creates internally an ostringstream, calls the | |
| 240 * method format(ostream, container, bool) and returns the | |
| 241 * resulting string. | |
| 242 * | |
| 243 * \param container Container object to export. | |
| 244 * | |
| 245 * \param exportGroupLabels if set to true, exports group | |
| 246 * indices as a \@x at the end of the sequence name, where x is | |
| 247 * the group index. Otherwise, this information is discarded. | |
| 248 * | |
| 249 * \param lineLength the number of characters to place on a | |
| 250 * single line. If zero, no newlines are inserted within | |
| 251 * sequences. | |
| 252 * | |
| 253 * \return The formatted string. | |
| 254 * | |
| 255 */ | |
| 256 static std::string format(const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50); | |
| 257 | |
| 258 | |
| 259 | |
| 260 protected: | |
| 261 | |
| 262 /// This class cannot be instantiated | |
| 263 Fasta() { } | |
| 264 | |
| 265 /// This class cannot be instantiated | |
| 266 Fasta(const Fasta& source) { } | |
| 267 | |
| 268 /// This class cannot be or copied | |
| 269 Fasta& operator=(const Fasta& source) { return *this; } | |
| 270 | |
| 271 /// This class cannot be instantiated | |
| 272 virtual ~Fasta() { } | |
| 273 | |
| 274 | |
| 275 }; | |
| 276 } | |
| 277 | |
| 278 #endif |
