Repository 'convert_extract_sequence_file'
hg clone https://toolshed.g2.bx.psu.edu/repos/bebatut/convert_extract_sequence_file

Changeset 0:01c2b74b3a21 (2016-04-26)
Next changeset 1:158642ce204f (2016-04-28)
Commit message:
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
added:
convert_extract_sequence_file.py
convert_extract_sequence_file.xml
test-data/extracted_quality_illumina_1_3_fastq_output.qual
test-data/extracted_sequences_illumina_1_3_fastq_output.fasta
test-data/information_lenght_fasta_output.txt
test-data/input_sequence_file.fasta
test-data/input_sequence_file.fastq
test-data/report_illumina_1_3_fastq_output.txt
test-data/report_length_fasta_output.txt
b
diff -r 000000000000 -r 01c2b74b3a21 convert_extract_sequence_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_extract_sequence_file.py Tue Apr 26 08:18:18 2016 -0400
[
b'@@ -0,0 +1,414 @@\n+#!/usr/bin/python\n+# -*- coding: utf-8 -*-\n+\n+import sys\n+import os\n+import argparse\n+import copy\n+import operator\n+\n+FASTA_FILE_LAST_POS = None\n+\n+#################\n+# Parse methods #\n+#################\n+def text_end_of_file(row):\n+    if row == \'\':\n+        return True\n+    else:\n+        return False\n+\n+def get_new_line(input_file, generate_error = True):\n+    row = input_file.readline()\n+    if text_end_of_file(row):\n+        if generate_error :\n+            string = os.path.basename(__file__) + \': \'\n+            string += \' unexpected end of file\'\n+            raise ValueError(string)\n+        else :\n+            return None\n+    else:\n+        return row[:-1]\n+\n+def next_fasta_record(input_file):\n+    global FASTA_FILE_LAST_POS\n+    if FASTA_FILE_LAST_POS != None:\n+        input_file.seek(FASTA_FILE_LAST_POS)\n+    else:\n+        FASTA_FILE_LAST_POS = input_file.tell()\n+\n+    id_line = get_new_line(input_file, generate_error = False)\n+    if id_line == None:\n+        return None\n+    split_line = id_line[1:].split(\' \')\n+    seq_id = split_line[0]\n+    description = \' \'.join(split_line[1:])\n+    new_line = get_new_line(input_file, generate_error = False)\n+    seq = \'\'\n+    while new_line != None:\n+        if new_line[0] != \'>\':        \n+            seq += new_line\n+            FASTA_FILE_LAST_POS = input_file.tell()\n+            new_line = get_new_line(input_file, generate_error = False)\n+        else:\n+            new_line = None\n+    return SeqRecord(seq_id, seq, description)\n+\n+def next_fastq_record(input_file):\n+    id_line = get_new_line(input_file, generate_error = False)\n+    if id_line == None:\n+        return None\n+    if id_line[0] != \'@\':\n+        string = os.path.basename(__file__) + \': \'\n+        string += \' issue in fastq file\'\n+        raise ValueError(string)\n+    split_line = id_line[1:].split(\' \')\n+    seq_id = split_line[0]\n+    description = \' \'.join(split_line[1:])\n+    seq = get_new_line(input_file)\n+    spacer = get_new_line(input_file)\n+    quals = get_new_line(input_file)\n+    return SeqRecord(seq_id, seq, description, quals)\n+\n+def next_record(input_file, file_format):\n+    if file_format == \'fasta\':\n+        return next_fasta_record(input_file)\n+    elif file_format == \'fastq\':\n+        return next_fastq_record(input_file)\n+    else:\n+        string = os.path.basename(__file__) + \': \'\n+        string += file_format + \' is not managed\'\n+        raise ValueError(string)\n+\n+def write_fasta_record(record, output_sequence_file):\n+    output_sequence_file.write(\'>\' + record.get_id() + \' \' + \n+        record.get_description() + \'\\n\')\n+    seq = record.get_sequence()\n+    split_seq = [seq[i:i+60] for i in xrange(0,len(seq),60)]\n+    for split in split_seq:\n+        output_sequence_file.write(split + \'\\n\')\n+\n+def format_qual_value(qual_score, sliding_value, authorized_range, qual_format):\n+    ascii_value = ord(qual_score)\n+    score = ascii_value-sliding_value\n+    if score < authorized_range[0] or score > authorized_range[1]:\n+        string = os.path.basename(__file__) + \': wrong score (\'\n+        string += str(score) + \') with quality format (\'\n+        string += qual_format\n+        raise ValueError(string)\n+    return score\n+\n+def format_qual_string(qual_string, qual_format):\n+    if qual_format == \'sanger\':\n+        return format_qual_value(qual_string, 33 ,[0,40], qual_format)\n+    elif qual_format == "solexa":\n+        return format_qual_value(qual_string, 64 ,[-5,40], qual_format)\n+    elif qual_format == "illumina_1_3":\n+        return format_qual_value(qual_string, 33 ,[0,40], qual_format)\n+    elif qual_format == "illumina_1_5":\n+        return format_qual_value(qual_string, 33 ,[3,40], qual_format)\n+    elif qual_format == "illumina_1_8":\n+        return format_qual_value(qual_string, 33 ,[0,41], qual_format)\n+    else:\n+        string = os.path.basename(__file__) + \': quality format (\'\n+        string += qual_format + \') is not managed\'\n+        raise ValueError(string) '..b'self.values = [value_format(value)]\n+        self.values.sort()\n+\n+    def get_raw_constraint_type(self):\n+        return self.raw_constraint_type\n+\n+    def get_type(self):\n+        return self.type\n+\n+    def get_values(self):\n+        return self.values\n+\n+    def test_constraint(self, similarity_info_value):\n+        to_conserve = True\n+        if self.raw_constraint_type == \'in\':\n+            to_conserve &= fast_test_element_in_list(similarity_info_value, \n+                self.values)\n+        elif self.raw_constraint_type == \'not_in\':\n+            to_conserve &= (not fast_test_element_in_list(similarity_info_value, \n+                self.values))\n+        else:\n+            to_conserve &= self.type(similarity_info_value, self.values[0])\n+        return to_conserve    \n+\n+################\n+# Misc methods #\n+################\n+def test_input_filepath(input_filepath, tool, file_format):\n+    if not os.path.exists(input_filepath):\n+        string = os.path.basename(__file__) + \': \'\n+        string += input_filepath + \' does not exist\'\n+        raise ValueError(string)\n+\n+def format_constraints(constraints):\n+    formatted_constraints = {}\n+    if constraints != None:\n+        for constr in constraints:\n+            split_constraint = constr.split(\': \')\n+            constrained_information = split_constraint[0]\n+            constraint = Constraint(split_constraint[1], split_constraint[2], \n+                constrained_information)\n+            formatted_constraints.setdefault(constrained_information,[]).append(\n+                constraint)\n+    return formatted_constraints\n+\n+def convert_extract_sequence_file(args):\n+    input_filepath = args.input\n+    file_format = args.format\n+    constraints = args.constraint\n+    formatted_constraints = format_constraints(constraints)\n+\n+    records = Records(input_filepath, file_format, formatted_constraints)\n+    records.save_conserved_records(args)\n+    \n+    report_filepath = args.report\n+    with open(report_filepath, \'w\') as report_file:\n+\n+        report_file.write(\'Information to extract:\\n\')\n+        if args.custom_extraction_type == \'True\':\n+            for info in args.to_extract[1:-1].split(\',\'):\n+                report_file.write(\'\\t\' + info + \'\\n\')\n+        else:\n+            report_file.write(\'\\tsequences\\n\')\n+\n+        if constraints != None:\n+            report_file.write(\'Constraints on extraction:\\n\')\n+            for constrained_info in formatted_constraints:\n+                report_file.write(\'\\tInfo to constraint: \' + constrained_info \n+                    + \'\\n\')\n+                for constraint in formatted_constraints[constrained_info]:\n+                    report_file.write(\'\\t\\tType of constraint: \' + \n+                        constraint.get_raw_constraint_type()\n+                        + \'\\n\')\n+                    report_file.write(\'\\t\\tValues:\\n\')\n+                    values = constraint.get_values()\n+                    for value in values:\n+                        report_file.write(\'\\t\\t\\t\' + str(value) + \'\\n\')\n+        report_file.write(\'Number of similarity records: \' + \n+            str(records.get_record_nb()) + \'\\n\')\n+        report_file.write(\'Number of extracted similarity records: \' +\n+            str(records.get_conserved_record_nb()) + \'\\n\')\n+\n+########\n+# Main #\n+########\n+if __name__ == "__main__":\n+    parser = argparse.ArgumentParser()\n+    parser.add_argument(\'--input\', required=True)\n+    parser.add_argument(\'--format\', required=True)\n+    parser.add_argument(\'--custom_extraction_type\', required=True)\n+    parser.add_argument(\'--to_extract\')\n+    parser.add_argument(\'--output_information\')\n+    parser.add_argument(\'--split\')\n+    parser.add_argument(\'--quality_format\')\n+    parser.add_argument(\'--output_sequence\')\n+    parser.add_argument(\'--output_quality\')\n+    parser.add_argument(\'--constraint\', action=\'append\')\n+    parser.add_argument(\'--report\', required=True)\n+    args = parser.parse_args()\n+\n+    convert_extract_sequence_file(args)\n'
b
diff -r 000000000000 -r 01c2b74b3a21 convert_extract_sequence_file.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_extract_sequence_file.xml Tue Apr 26 08:18:18 2016 -0400
[
b'@@ -0,0 +1,249 @@\n+<tool id="convert_extract_sequence_file" name="Convert/ Extract information" version="1.0.0">\n+\n+\t<description>from a sequence file, with possible constraints</description>\n+\n+    <macros>\n+        <xml name="extraction_option">\n+            <param name="to_extract" type="select" display="checkboxes" multiple="true" label="Information to extract" help="">\n+                <option value="id">Identifiant</option>\n+                <option value="length">Length</option>\n+                <validator type="no_options" message="Select at least one information to extract"/>\n+            </param>\n+        </xml>\n+\n+        <xml name="extraction_test">\n+            <param name=\'specific_extraction\' type="select" label="Extract specific information?" help="If no is selected, a sequence file is generated. If yes, a text file containing the wanted information is generated">\n+                <option value="True">Yes</option>\n+                <option value="False" selected="true">No</option>\n+            </param>\n+        </xml>\n+    </macros>\n+\n+\t<requirements>\n+  \t</requirements>\n+\n+    <stdio>\n+    </stdio>\n+\n+    <version_command>\n+    </version_command>\n+\n+  \t<!--<command>-->\n+    <command><![CDATA[\n+  \t\tpython $__tool_directory__/convert_extract_sequence_file.py \n+      \t\t--input $sequence_file_format.sequence_file\n+\n+            --custom_extraction_type $sequence_file_format.extraction.specific_extraction\n+\n+            #if $sequence_file_format.extraction.specific_extraction == "True":\n+                --to_extract "{$sequence_file_format.extraction.to_extract}"\n+                --output_information $information_file\n+            #else if $sequence_file_format.format=="fastq":\n+                --split $sequence_file_format.extraction.split.split_test\n+                #if $sequence_file_format.extraction.split.split_test :\n+                    --quality_format $sequence_file_format.extraction.split.quality_format\n+                    --output_sequence $fasta_sequence_file_from_fastq\n+                    --output_quality $quality_file\n+                #else:\n+                    --output_sequence $fastq_sequence_file\n+                #end if\n+            #else:\n+                --output_sequence $fasta_sequence_file\n+            #end if\n+                \n+    \t\t#if $constraints.constrained_extraction == "True" :\n+                #for $i, $constrain in enumerate( $constraints.constraint_definition )\n+                \t#set info_to_constrain=$constrain.constrained_information[\'info_to_constrain\']             \n+                    #if $info_to_constrain in ("id"):\n+                    \t--constraint "$info_to_constrain:\n+                    \t${constrain.constrained_information.constraint_type.type}:\n+                    \t${constrain.constrained_information.constraint_type.value}"\n+                    #else:\n+                    \t#for $j, $sub_constrain in enumerate( $constrain.constrained_information.constraint_definition )\n+                    \t\t--constraint "$info_to_constrain:\n+                    \t\t${sub_constrain.type}:\n+                    \t\t${sub_constrain.value}"\n+                        #end for\n+                    #end if\n+                #end for\n+            #end if\n+\n+            --report $report_filepath\n+            --format $sequence_file_format.format\n+        ]]>\n+  \t</command>\n+\n+  \t<inputs>\n+        <conditional name="sequence_file_format">\n+        \t<param name="format" type="select" display="radio" \n+                label="Format of the sequence file" help="">\n+\t            <option value="fasta">Fasta</option>\n+                <option value="fastq">FastQ</option>\n+\t        </param>\n+            <when value="fastq">\n+                <param name="sequence_file" type="data" format="fastq" \n+                    label="Sequence file" help=""/>\n+                <conditional name="extraction">\n+                    <expand macro="extraction_test"/> \n+\n+                    <when value="True">\n+              '..b'+                                </param>\n+                                <param name="value" type="integer" min="0" max="3000" value="100" label="Value" help=""/>\n+                            </repeat>\n+                        </when>  \n+                    </conditional>\n+                </repeat>\n+            </when>\n+            <when value="False" />\n+        </conditional> />\n+  \t</inputs>\n+\n+  \t<outputs>\n+        <data format="txt" name="information_file" \n+            label="${tool.name} on ${on_string}: Information">\n+            <filter>((sequence_file_format[\'extraction\'][\'specific_extraction\'] == "True" ))</filter>\n+        </data>\n+\n+        <data format="fasta" name="fasta_sequence_file"\n+            label="${tool.name} on ${on_string}: Extracted sequences" >\n+            <filter>((sequence_file_format[\'format\'] == \'fasta\' and not sequence_file_format[\'extraction\'][\'specific_extraction\']== "True" ))</filter>\n+        </data>\n+\n+        <data format="fastq" name="fastq_sequence_file"\n+            label="${tool.name} on ${on_string}: Extracted sequences">\n+            <filter>((sequence_file_format[\'format\'] == \'fastq\' and sequence_file_format[\'extraction\'][\'specific_extraction\'] == "False" and sequence_file_format[\'extraction\'][\'split\'][\'split_test\'] == "False" ))</filter>\n+        </data>\n+\n+        <data format="qual" name="quality_file" \n+            label="${tool.name} on ${on_string}: Extracted quality">\n+            <filter>((sequence_file_format[\'format\'] == \'fastq\' and sequence_file_format[\'extraction\'][\'specific_extraction\'] == "False" and sequence_file_format[\'extraction\'][\'split\'][\'split_test\'] == "True" ))</filter>\n+        </data>\n+\n+        <data format="fasta" name="fasta_sequence_file_from_fastq"\n+            label="${tool.name} on ${on_string}: Extracted sequences">\n+            <filter>((sequence_file_format[\'format\'] == \'fastq\' and sequence_file_format[\'extraction\'][\'specific_extraction\'] == "False" and sequence_file_format[\'extraction\'][\'split\'][\'split_test\'] == "True" ))</filter>\n+        </data>\n+\n+        <data format="txt" name="report_filepath" \n+            label="${tool.name} on ${on_string}: Report"/>\n+  \t</outputs>\n+\n+  \t<tests>\n+        <test>\n+            <param name="format" value="fasta"/>\n+            <param name="sequence_file" value="input_sequence_file.fasta"/>\n+            <param name="specific_extraction" value="True" />\n+            <param name="to_extract" value="length" />\n+            <param name="constrained_extraction" value="False" />\n+            <output name="information_file" file="information_lenght_fasta_output.txt"/>\n+            <output name="report_filepath" file="report_length_fasta_output.txt"/>\n+        </test>\n+        <test>\n+            <param name="format" value="fastq"/>\n+            <param name="sequence_file" value="input_sequence_file.fastq"/>\n+            <param name="specific_extraction" value="False" />\n+            <param name="split_test" value="True" />\n+            <param name="quality_format" value="illumina_1_3" />\n+            <param name="constrained_extraction" value="False" />\n+            <output name="quality_file" file="extracted_quality_illumina_1_3_fastq_output.qual"/>\n+            <output name="fasta_sequence_file_from_fastq" file="extracted_sequences_illumina_1_3_fastq_output.fasta"/>\n+            <output name="report_filepath" file="report_illumina_1_3_fastq_output.txt"/>\n+        </test>\n+  \t</tests>\n+\n+  \t<help><![CDATA[\n+\n+**What it does**\n+\n+This tool extracts information (sequences, id, length, ...) from sequence files or convert a FastQ file to Fasta file.\n+\n+Some constraints could be added to extraction/conversion. For example, only sequences with more than 30 bp could be extracted. Or, a sequences whose the identifiant is in a list. \n+\n+The input is a sequence file in fasta or fastq format. The tool generates different outputs given the chosen parameters.\n+]]>\n+  \t</help>\n+\n+    <citations>\n+    </citations>\n+</tool>\n+\n'
b
diff -r 000000000000 -r 01c2b74b3a21 test-data/extracted_quality_illumina_1_3_fastq_output.qual
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extracted_quality_illumina_1_3_fastq_output.qual Tue Apr 26 08:18:18 2016 -0400
b
b'@@ -0,0 +1,174 @@\n+>HWI-M00234:263:000000000-ADM55:1:1101:21704:2233 1:N:0:ATCACG\n+33 34 34 34 34 38 38 36 35 23 35 37 38 34 25 25 10 31 34 31 24 31 11 11 36 38 37 32 37 31 34 34 22 31 31 31 37 38 32 37 27 31 37 38 38 36 38 35 22 25 27 21 36 35 35 36 34 34 37 38\n+38 34 37 38 38 34 27 21 21 11 37 27 37 37 34 38 37 34 19 33 37 37 30 20 30 27\n+>HWI-M00234:263:000000000-ADM55:1:1101:21334:2699 1:N:0:ATCACG\n+34 34 34 34 34 38 37 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 37 38 38 38 38 38 38 38 38 38 38 38 38 38 31 37 38 38 38 38 38 38 38 38 36 32 35 38 38 38\n+38 38 38 38 38 38 38 38 38 38 37 38 38 38 38 37 37 31 34 37 37 36 38 38 35 38 38 38 38 38 37 38 38 38 38 38 38 38 38 37 38 37 34 37 38 38 38 38 38 37 38 34 36 28 37 38 37 35 38 37\n+38 38 34 36 36 38 38 38 38 35 37 38 38 38 37 38 38 38 38 38 34\n+>HWI-M00234:263:000000000-ADM55:1:1101:13968:2885 1:N:0:ATCACG\n+27 34 34 23 34 36 37 35 38 38 37 35 37 27 11 21 31 37 22 37 34 38 37 11 25 37 37 34 36 35 31 31 31 11 26 26 11 27 34 37 37 37 32 11 21 34 31 37 34 31 11 27 36 10 23 22 37 37 34 37\n+34 38 37 37 23 11 27 27 37 38 22 10 31 37 22 37 36 37 37 38 38 38 34 37 36 34 37 11 28 27 33 28 36 37 24 24 37 31 24 27 37 32 37 37 38 35 37 30 28 36 25 28 19 25 11 24 11 11 34 37\n+24 32 19 11 29 23 29 34 10 29 18 33 37 38 35 26 31 11 22 29 11 28 28 29 21 30 28 24 11 11 31 10 31 19 27 28 21 24 24 34 24 11 31 10 31 27 10 22 17 26 31 20 26 15 9 24 25 30 23 17\n+33 37 23 20 28 31 17 28 34 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n+>HWI-M00234:263:000000000-ADM55:1:1101:11873:2948 1:N:0:ATCACG\n+34 34 34 34 34 34 35 26 33 21 34 11 26 27 36 32 37 36 34 37 31 37 38 38 38 38 32 36 34 36 38 31 37 31 37 38 38 38 37 32 32 37 24 37 38 38 34 37 29 37 22 27 37 34 37 37 27 21 37 36\n+37 37 37 34 35 34 37 38 38 38 36 34 27 37 11 27 32 27 36 37 34 31 37 24 33 37 38 36 27 24 35 37 37 38 36 36 38 38 31 37 37 38 37 37 38 37 23 37 35 36 37 34 27 37 11 24 19 25 36 37\n+24 11 34 28 36 38 38 38 34 34 37 34 34 36 29 33 10 29 36 24 37 34 38 38 21 10 31 34 34 29 36 37 34 23 35 29 34 36 37 11 36 28 31 28 36 30 34 11 21 28 32 37 38 29 29 24 30 35 34 38\n+34 29 37 38 29 38 38 34 35 32 20 9 26 10 28 32 37 37 9 32 26 14 30 33 8 12 8 7 26 21 19 25 8 29 18 27 36 27 18 8 8 22 27 36 18 28 26 29 37 35 37 30 36 27 27 19 8 18 26 36\n+37 31 37 37 32 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n+>HWI-M00234:263:000000000-ADM55:1:1101:7855:3066 1:N:0:ATCACG\n+34 34 34 34 34 38 38 38 38 38 38 38 38 37 38 38 38 34 33 34 37 38 38 38 38 38 37 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 11 34 36 38 38 38 38 38 38 38 38 38 38 38 38 38 38\n+31 37 31 37 38 38 38 38 38 38 38 38 31 31 36 38 37 38 38 38 37 38 37 38 34 38 36 38 38 38 38 38 38 38 38 38 38 34 37 38 38 38 38 36 38 36 36 36 22 37 37 38 37 37 38 35\n+>HWI-M00234:263:000000000-ADM55:1:1101:24817:3238 1:N:0:ATCACG\n+34 34 34 34 34 22 37 38 37 38 36 38 38 38 38 38 38 35 37 38 38 38 38 38 38 35 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 37 36 36 34 38 38 38 38 38 38 37 37 38\n+38 38 38\n+>HWI-M00234:263:000000000-ADM55:1:1101:17179:3527 1:N:0:ATCACG\n+34 34 34 34 34 38 27 37 34 34 38 38 37 38 38 37 38 38 38 38 38 38 38 38 37 38 38 38 38 38 38 35 36 38 30 27 36 34 38 34 37 36 38 27 36 38 38 38 37 11 34 37 38\n+>HWI-M00234:263:000000000-ADM55:1:1101:21254:3571 1:N:0:ATCACG\n+34 34 34 34 34 37 36 37 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 27 37 34 34 38 38 37 37 38 38 35 37 38 38 38 37 38 38 34 38 38 22 37 38\n+38 38 38 38 34 38 38 32 37 37 36 36 38 34 25 37 38 38 38 38 38 38 38 38 38 35 37 38 38 37 32 38 37 38 38 38 38 22 37 36 38 38 38 38 38 38 36 22 37 38 38 38 29 36 38 38 38 37 37 37\n+34 37 38 38 37 34 36 36 38 38 37 38 34 38 38 38 37 38 37 35 38 37 37 38 38 34 37 38 38'..b'32 30 37 37 37 37 37 37 37 37 32 33 33 37 24 20 33 31 32 37 37 37 37 37 37 37 37\n+>HWI-M00234:263:000000000-ADM55:1:1101:5068:6755 1:N:0:ATCACG\n+34 34 34 34 34 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38\n+38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 35 37 37 30 37 38 38 38 38 38 35 37 38 38 38 38 38 38 38 38 38 38 37 37 37 37 38 38 38 38 38 37 38 38 37 38 38 38 38 38 38 35\n+38 38 38 38 38 38 38 37 38 38 38 36 36 35 38 38 38 24 34 24 36 36 38 38 38 38 38 35 23 31 37 38 38 38 38 38 38 38 38 38 38 38 38 38 29 34 37 38 38 38 38 38 34 37 38 35 38 35 37 38\n+37 38 38 37 22 34 37 38 35 37 29 37 37 37 32 37 28 26 32 29 36 35 35 33 37 33 33 37 37 37 31 35 31 35 27 36 30 31 37 15 16 26 31 27 30 34 27 19 22 16 13 17 8 19 23 27 30 30 17 7\n+20 19 24 27 33 29 37 35 37 17 19 8 21 21 27 8 8\n+>HWI-M00234:263:000000000-ADM55:1:1101:21477:6796 1:N:0:ATCACG\n+34 34 11 32 34 25 37 23 27 37 31 34 34 36 35 38 38 34 36 38 38 38 38 38 37 32 38 27 37 38 38 23 31 37 36 37 36 37 37 34 36 38 38 34 37 37 27 27 27 37 31 27 34 10 31 37 37 25 32 37\n+34 36 37 36 11 36 34 37 24 32 37 38 34 31 35 36 27 37 36 11 11 20 31 24 33 23 33 37 37 34 36 23 37 11 28 37 37 31 34 38 22 37 34 37 38 35 25 37 37 37 22 36 32 37 35 38 24 37 26 37\n+37 37 31 25 37 36 36 38 31 19 28 31 35 36 37 38 21 28 36 38 34 32 35 34 21 31 36 34 34 32 11 31 35 35 37\n+>HWI-M00234:263:000000000-ADM55:1:1101:12483:6880 1:N:0:ATCACG\n+34 34 34 34 34 38 38 38 38 38 38 38 37 38 38 37 38 38 38 38 38 37 38 35 37 36 38 38 38 38 38 37 37 37 38 38 37 38 38 38 38 38 38 38 38 37 38 38 38 37 38 38 38 38 38 38 38 38 34 36\n+38 38 38 38 34 37 38 38 38 38 38 38 38 38 38 38 38 38 37 38 38 38 37 36 38 38 35 38 38 38 38 37 37 37 38 38 38 38 38 38 38 38 37 38 38 38 38 38 38 38 38 38 38 37 38 37 38 34 37 31\n+37 36 37 37 38 35 32 38 37 38 37 38 38 38 38 37 38 38 38 38 35 38 38 38 37 37 38 38 38 35 26 36 36 37 38 38 38 38 38 38 38 37 38 38 38 38 38 38 38 38 38 38 37 35 38 35 38 38 38 38\n+38 38 38 38 38 38 38 35 38 37 37 37 37 37 37 37 24 26 26 35 37 37 37 37 37 33 37 37 33 35 33 37 37 33 37 35 28 35 35 37 35 37 37 37 36 37 36 37 36 37 36 36 34 36 27 31 36 31 34 37\n+37 31 36 33 32 37 37 36 36 36 37 37\n+>HWI-M00234:263:000000000-ADM55:1:1101:27958:6935 1:N:0:ATCACG\n+34 34 34 34 34 38 38 38 38 38 38 38 38 38 38 38 35 38 38 34 37 38 38 38 38 38 38 38 37 36 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 23 35 34\n+27 37 38 38 38 37 37 38 38 38 38 38 38 38 38 38 38 34 38 37 25 37 35 38 38 38 38 38 38 31 37 36 36 36 38 38 38 36 38 35 38 38 38 37 38 38 38 38 38 38 25 37 37 38 38 28 36 38 37 37\n+37 34 36 37 38 24 36 36 37 38 35 10 31 31 36 36 35 36 35 23 35 32 29 36 36 34 31 36 37 38 37 32 37 37 38 37 28 37 30 10 19 19 26 34 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n+>HWI-M00234:263:000000000-ADM55:1:1101:6151:7404 1:N:0:ATCACG\n+34 34 34 34 34 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 37 38 37 38 38 38 38 38\n+38 38 38 38 38 37 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 35 37 23 37 38 38 38 37 38 38 38 38 36 38 38 38 22 10 34 38 38 37 38 38 38 38 38 38 38 38 34 25 23 37 38 38\n+38 35 37 38 37 36 38 38 38 37 38 38 38 37 38 34 38 38 38 38 38 38 38 38 35 38 30 36 38 38 38 38 38 37 37 11 27 37 38 38 38 38 37 35 38 38 38 38 11 20 11 36 37 38 38 38 35 10 10 9\n+20 25 35 38 37 34 35 35 30 21 29 32 37 37 37 19 30 21 8 20 30 36 33 30 35 15 29 35 37 37 37 32 37 15 7 14 13 21 27 35 15 18 26 18 7 8 13 21 19 21 27 32 8 8 13 8 8 7 19 25\n+15 27 33 37 33 35 30 37 27 17 27 32 37 37 27 33 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n'
b
diff -r 000000000000 -r 01c2b74b3a21 test-data/extracted_sequences_illumina_1_3_fastq_output.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extracted_sequences_illumina_1_3_fastq_output.fasta Tue Apr 26 08:18:18 2016 -0400
b
b'@@ -0,0 +1,174 @@\n+>HWI-M00234:263:000000000-ADM55:1:1101:21704:2233 1:N:0:ATCACG\n+GTGTACAAGGCCCGGGAACGTATTCACCGCGGCATGCTGATCCGCGATTACTAGCGATTC\n+CAACTTCATACAGGCGGGTTTCAGCC\n+>HWI-M00234:263:000000000-ADM55:1:1101:21334:2699 1:N:0:ATCACG\n+CTTTTGACGGCCTTTTTGTTGTCTTCTGAATTTTGTTTTTTTCGGTTGTAACATAATTCC\n+TAAAAATCAAATTCGTTAGCGATTATTTTTCTTTCTTTTGAAGTTCTTTCCGCCATTGTT\n+TCCGCTATTGCTTCCACGACC\n+>HWI-M00234:263:000000000-ADM55:1:1101:13968:2885 1:N:0:ATCACG\n+CCCACTGCTGCCTCCCGTAGGAGCCTGGACCGTGTCTCAGTTCCAATGTGGCCGATCACC\n+CTCTCAGGTCGGCTACCCATCGTTGCCCTGGTAAGCCATTACCCTACCAACTAGCTAATG\n+GGACGCGGGCACATCTCAAAGCGGATTACCCCTTTGGTTAAAGTTTAATGCGAAACTCTA\n+ACTTTATGCGTTATTAACCCTCCTTTCGGAGGGCTATTCCCCTCTTTGATGCAGGTTGCC\n+CACGTGTTACTCACCCGTCCGCCGCTAATCCACTTCCCGAAGGCAGATTCAACCCTCGAT\n+>HWI-M00234:263:000000000-ADM55:1:1101:11873:2948 1:N:0:ATCACG\n+CCCCGTTACATCTTCCGCGCAGGAAGACTCGATCAGTGAGCTATTACGCTTTCTTTGAAG\n+GATGGCTGCTTCTAAGCCAACTTCCTGACTGTCTTAGCCTTCCCACTTCGTTTTCCACTT\n+AGTCAATATTAGGGACCTTAGCCGGCGGTCTGGGTTGTTTCCCTCTTGAGTCCGGACGTT\n+AGCACCCGGTGCTCTGTCCCCCGTGCCCAAACTTCCAAGTATTCGGAGTTTGCCATGGTT\n+TGGTAAGACGCTATGTCCCCCTAACCATAACAGTGTTCTACCCTCTGAACACATACTCGA\n+>HWI-M00234:263:000000000-ADM55:1:1101:7855:3066 1:N:0:ATCACG\n+CGTCAATCTTCCAACGCCCACGGCAGATAGGGACAAAACTGTCTCACGACGTTTTAAACC\n+CAGCTCACGTACCTCTTTAAATGGCGAACAGCCATACCCTTGGGACCGGCTACAGC\n+>HWI-M00234:263:000000000-ADM55:1:1101:24817:3238 1:N:0:ATCACG\n+CCCGTGACCAGATTCATGGCCGCCGGTTAGAACCCCAGTACTGTCAGGGTGGTATCCCAA\n+GGA\n+>HWI-M00234:263:000000000-ADM55:1:1101:17179:3527 1:N:0:ATCACG\n+GTGCTTATTTCACTTCCTCAAAATCAGCATCCTGCACATTATCGCCATGCTTT\n+>HWI-M00234:263:000000000-ADM55:1:1101:21254:3571 1:N:0:ATCACG\n+GTCGCGGGGAAGATCAACTCCTGCAATTCGAGCCATACCAATTACCTCCTAGCGCCCTGA\n+CGCTGTTTATGACGAGGATCGCGGCTGCAGATGACGCGAATGACGCCATTGCGCTTGATC\n+ACTCGGCAATACTCACAAATGGGTTTAACAGATTATCTAACCTTCATAGTTTGCACACTC\n+CTCATGGACATTCC\n+>HWI-M00234:263:000000000-ADM55:1:1101:16569:3709 1:N:0:ATCACG\n+CCCGTCAATTCCTTTGAGTTTCATTCTTGCGAACGTACTCCCCAGGTGGAATACTTATTG\n+CGTTTGCTGCGGCACCGAATGGCTTTGCCACCCGACACCTAGTATTCATCGTTTACGGCG\n+TGGACTACCAGGGTATCTAATCCTGTTTGCTCCCCACGCTTTCGAGCCTCAACGTCAGGA\n+TCGGAAGAGCACACGTCTGAACTCCAGTCACATCCCG\n+>HWI-M00234:263:000000000-ADM55:1:1101:22403:3828 1:N:0:ATCACG\n+CCCGGACATCTTCGGCGCAGGATCTCTTGACTAGTGAGCTATTACGCACTCTTTAAATGA\n+GTGGCTGCTTCTAAGCCAACATCCTAGTTGTCTTAGAAATCCCACATCCTTTTCCACTTA\n+ACTTACACTTTGGGACCTTAGCTGATGAGATCGGAAGAGCACACGTCTGAACTCCAGTCA\n+CATCACGATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAAAAATATCAATCAAACACAC\n+ACACACCAGCTTAC\n+>HWI-M00234:263:000000000-ADM55:1:1101:7508:4067 1:N:0:ATCACG\n+GGTGCACTAGGATCGTAGTTGGCTACTTTCCCGTTTTCAATGTATACGCAAGGTACACGG\n+TCAGCGGTAGCTGCCATAATGTAATGGTAGTCAAATCCTAAGTCTCCCAATGCACATGGC\n+AGTGGAGCGTTCCAATCTTGTGTACCGGCTTTGTCTCCTAATCCCAAGTGCCATTTTCCG\n+ATAGCTCCGGTAGCATAACCGGTACTTTTGAACATAACCGCCATGGTATATTGTTCGGGA\n+CGGATAATCATTCCGGCATTTCCGGCAGCTACG\n+>HWI-M00234:263:000000000-ADM55:1:1101:26232:4161 1:N:0:ATCACG\n+CTCTTCTTAAGCTCGAACAGCTTCTTCAGTCTTACCTGCCATTTCATCTAAAATTTTTAA\n+AGCTCGCTCTAAAACAGTAGTATCATCAATCATAACCAATCCACCATCTGGTCCAGGTTC\n+CAAGTGAATCCCAACACTTTTCCCCTCCTTAAAATTGTGTCCACCGAAAAAATTTCTTAC\n+AGTGTCAACATTCAGTCCAAGTTCATCGGCTATTCTATGCATACTACCACTAGGCAATGA\n+GT\n+>HWI-M00234:263:000000000-ADM55:1:1101:23151:4189 1:N:0:ATCACG\n+GCGGATTTTCTCGGGAGTATGATTACCCACACTATTGGATTCTTCCGAAGAAGACTCCAT\n+ACTACCAAGTTCAGCTCGGATGGTGGATTTGCCTGCCAAGATCAGAAGAGCACACGTCTG\n+AACTCCAGTCACATCACGACTTCGTATGCCGTATTCTGCTTGAAAAAAACAATGAAAACT\n+AGTCACCGATGACTCACGTTCGCTGAACATTATCTGACAACTAGCTTAACAATGGACATG\n+CTTAGCACTGACAACCAGACG\n+>HWI-M00234:263:000000000-ADM55:1:1101:17148:4803 1:N:0:ATCACG\n+CCCTTTTCGGCAATGGCGATGTTACGGTACATAAGAGCGATTTCGGGGAAACCTTCTTCG\n+TCTGCAATATCAGCAAACTTAGGATAATCCAATGA\n+>HWI-M00234:263:000000000-ADM55:1:1101:8939:4808 1:N:0:ATCACG\n+CCACAGTTCTGTCATATCGCAGTACAGGAATCTCAACCTGTTGTCCATCGGCTACGGCTC\n+TCGCCCTCGCCTTAGGCCCCGACTTACCCAGGGCAGATCAGCTTTA\n+>HWI-M00234:263:000000000-ADM55:1:1101:11727:5103 1:N:0:ATCACG\n+CTCCCGTATAGGATGGATCGAAAGCAATCGTCAGGAATCCACGTTCTGCCAATGTCTGCG\n+CATACAAGCCCGAAGCTTGTTCCTTCACGGCACCAAATGGGCCACTGATGGCAATGGCAG\n+GCAATTTACCCGTCACATTCTTGGGTACAT'..b'0000-ADM55:1:1101:26932:5861 1:N:0:ATCACG\n+AGCGTCAGTTACAGTCCAGAAAGCCGCCTTCGCCACTGGTGTTCTTCCTAATCTCTACGC\n+ATTTCACCGCTACACTAGGAATTCCGCTTTCCTCTCCTGCACTCTAGATATCCAGTTTGG\n+AATGCAGCCCCCAGGTTAAGCCCGGGGATTTCACATCCCACTTAAACATCCGCCTACGCA\n+CCCTTTACCACCAGTAAATCCGGACAACGCTCGCCACCTACGTATTACCGCGGCTGCTGG\n+CACGTAGTTAGCCGTGGCTTCCTCCTCTGGTACCTTCATTATCGTCCCCGAAACC\n+>HWI-M00234:263:000000000-ADM55:1:1101:14154:5990 1:N:0:ATCACG\n+GTGGGATGTCAAGGTAAGCGGCGGCAGCCTTGGCTACAGCAAGAGATACGCCGAGGATGG\n+CATTGGCACCCAGATTGGCTTTCGTTTTGGTTCCGTCCAGTGCAAGCATGGCATGGTCGA\n+TACCCATCTGGTCGAGGGCGGACATACCGATGAGCTTGGGAGCAATGATATTATTGATAT\n+TGTCCACGGCTTTCAGGAC\n+>HWI-M00234:263:000000000-ADM55:1:1101:25022:6229 1:N:0:ATCACG\n+CCCGAGAACGTATTCACCGCAACATTGCTGATTTGCGATTACTAGTGATTCCAACTTCAT\n+GTACTCGAGTTGCAGAGTACAATCCGAACTACGAACAGCTTTCTGAGGTTTGCTCCTCCT\n+CGCAGATTTGCTGATCGGAAGAGCACACGTCTGAACTCCAGTCACATC\n+>HWI-M00234:263:000000000-ADM55:1:1101:21023:6286 1:N:0:ATCACG\n+GTGTACAAGGCCCGGGAACGTATTCACCGCGCCGTGGCTGATGCGCGATTACTAGCGAAT\n+CCAGCTTCATGGAGTCGGGTTGCAGACTCCAATCCGAACTGAGAGAGGCTTTAGGGATTA\n+GCATCACGTCGCCGTGTAGCTGCCTTCTGTACCCCCCATTGTAACACGTGTGTAGCCCCG\n+GACGTAAGGGCCGTGCTGATTTGCCGTCATCCCCACCTTCCTCAC\n+>HWI-M00234:263:000000000-ADM55:1:1101:8138:6336 1:N:0:ATCACG\n+TGTGTACAAGGCCCGGGAACGTATTCACCGCGACATTCTGATTCGCGATTACTAGCAACT\n+CCAGCTTCATGTAGGCGAGTTGCAGCCTACAATCCGAACTGAGATAGGTTTTATAAGTTT\n+TGCTCCACCTCACGGTCTT\n+>HWI-M00234:263:000000000-ADM55:1:1101:11055:6543 1:N:0:ATCACG\n+CCTGGACATTTTCGGCGCAGGACCTCCCGACCAGTGAGCTATTACGCACTCTTTGACTGT\n+GTGGCTGCTTCTGAGCCAACATCCTGGTTGACTTTGAAATCTCACATACTTTTCCACTTA\n+ACACGCACTTTGGGACCTTAGCTGGAGGTCTTGGCTCTTTCCTTTTTGCCTATCCAACTT\n+ATCTCGTATAGTCTGACTCCCGG\n+>HWI-M00234:263:000000000-ADM55:1:1101:24980:6575 1:N:0:ATCACG\n+GCCATTTATCTACGCCTGTTGTCACCAACAGGCTCTAGCGGTCTACCCTCCGACGTGGGG\n+CGAGCAACCCTCATA\n+>HWI-M00234:263:000000000-ADM55:1:1101:7596:6682 1:N:0:ATCACG\n+GCCCCCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATACTTA\n+ACGCTTTCGCTTGGCGGCTGACAATGTATCGCCAACCGCGAGTATTCATCAGATCGGAAG\n+AGCACACGTCTGAACTCCAGTCACATCCCGATCTCGTATGCCGTCCTCGGCTTGAAAAAA\n+AAAAATATAAACCATGATGAATTTCGAACACTCACGACCACACACTAAGCGTCGACGTA\n+>HWI-M00234:263:000000000-ADM55:1:1101:24419:6736 1:N:0:ATCACG\n+CGGGCGTTGGCAAGTACGTTATATCACTTACCCACGCTCGATTCAGAACCCCAGGAGTAA\n+ATGCTCGTTTTAAGAGGTTGGGATACACAGGCATGGTGTGATTACTGTCGGTGGTTTTTA\n+CGGTTCGCTTAAAAAAGCGGTAACCAATGATGCCATTAAAGCGAAGGATAGCGCGTAAAC\n+GTTTGACACCAACTTTCACTCCACGGTGTTGCATCAAAGC\n+>HWI-M00234:263:000000000-ADM55:1:1101:5068:6755 1:N:0:ATCACG\n+GCAGCGGCAGAACCAAAGCCATCAACCGTTGCCTTATCCTCTAAATTTTCACCCGCGCCA\n+CGAGGCTGACACGAACTATCCCCGATTTGACTGCACCACTTTATCGGAATGCTTCGGAGC\n+AACAGCTTCCGAGTGATGTCACGTCCCAGCACCTGGTGCCGGGATTAAGCTTCAATCTAC\n+TATACTTCGATTAAGCAGCGAGAGCGTAACGAGTTTCGCCAGATAAAATTTTGAGGACTG\n+AGATTAAAGTGCAAATC\n+>HWI-M00234:263:000000000-ADM55:1:1101:21477:6796 1:N:0:ATCACG\n+CCCCACTGCTGCTTCCCGTAGGAGTTTGGACCGTGTCTCAGTTCCAATGTGGGGGACCTT\n+CCTCTCAGAACCCCTATCCATCGTTGACTAGGTGGGCCGTTACCCCGCCTACTATCTAAT\n+GGAACGCATCCCCATCGTCTACCGGAAAATACCTT\n+>HWI-M00234:263:000000000-ADM55:1:1101:12483:6880 1:N:0:ATCACG\n+CTCCTTGTCTGCCTTCGATTACGGCATTTGCAATCGTTCCTGTAAGTAGTTTTACAGCTC\n+TAATGGCATCGTCATTCCCTGGTATTACGTAGTCTATTTCATCAGGATCACAGTTAGTAT\n+CAACTATTGCTACAACTGGTATTCCTAATATCTTCGCTTCATTTACTGCAATTCTTTCTT\n+TTCTTGGATCTACAACAAATAATACATCAGGAAGACCACCCATATCTTGAATGCCACCTA\n+AAAACTTTTCAA\n+>HWI-M00234:263:000000000-ADM55:1:1101:27958:6935 1:N:0:ATCACG\n+CCATTCGGAAATCCGCGGATCAAAGGTCATTTGCACCTACCCGCAGCTTATCGCAGCTTA\n+TCACGTCCTTCATCGCCTCCGAGAGCCAAGAGATCGGAAGAGCACACGTCTGAACTCCAG\n+TCACATCACGATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAACCCACAAATTCACACT\n+ACACACCCACCTCCCATCACGCATCTCTTTTGTCCGAGTCACGCTGCACGCTACCTGCAC\n+ACTACCTGCCTCAGTTACATCTTTTTACTCAATGTCCCACCATTTATCAC\n+>HWI-M00234:263:000000000-ADM55:1:1101:6151:7404 1:N:0:ATCACG\n+GCACGTAGTTAGCCGGGGCTTCCTCCTAAGGTACCGTCATTATCGTCCCTTAGGACAGAG\n+CTTTACGATCCGAAAACCTTCATCGCTCACGCGGCGTCGCTGCATCAGAGTTCCCTCCAT\n+TGTGCAATATTCCCCACTGCTGCCTCCCGTAGGAGTCTGGACCGTGTCTCAGTTCCAGTG\n+TGGCCGTTCACCCTCCCAGGCCGGCTACCCATCGTCGCCTTGGTTAGCTATTACCTCCCC\n+AACTAGCTAATGGGACGCGAGTCCAGCTTATACCACTTAACCGCTTTTGCTTGAAAATCA\n'
b
diff -r 000000000000 -r 01c2b74b3a21 test-data/information_lenght_fasta_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/information_lenght_fasta_output.txt Tue Apr 26 08:18:18 2016 -0400
b
@@ -0,0 +1,37 @@
+length
+86
+141
+300
+300
+116
+63
+53
+194
+217
+254
+273
+242
+261
+95
+106
+192
+200
+148
+233
+242
+271
+284
+295
+199
+168
+225
+139
+203
+75
+239
+220
+257
+155
+252
+290
+300
b
diff -r 000000000000 -r 01c2b74b3a21 test-data/input_sequence_file.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_sequence_file.fasta Tue Apr 26 08:18:18 2016 -0400
b
b'@@ -0,0 +1,174 @@\n+>HWI-M00234:263:000000000-ADM55:1:1101:21704:2233 1:N:0:ATCACG\n+GTGTACAAGGCCCGGGAACGTATTCACCGCGGCATGCTGATCCGCGATTACTAGCGATTC\n+CAACTTCATACAGGCGGGTTTCAGCC\n+>HWI-M00234:263:000000000-ADM55:1:1101:21334:2699 1:N:0:ATCACG\n+CTTTTGACGGCCTTTTTGTTGTCTTCTGAATTTTGTTTTTTTCGGTTGTAACATAATTCC\n+TAAAAATCAAATTCGTTAGCGATTATTTTTCTTTCTTTTGAAGTTCTTTCCGCCATTGTT\n+TCCGCTATTGCTTCCACGACC\n+>HWI-M00234:263:000000000-ADM55:1:1101:13968:2885 1:N:0:ATCACG\n+CCCACTGCTGCCTCCCGTAGGAGCCTGGACCGTGTCTCAGTTCCAATGTGGCCGATCACC\n+CTCTCAGGTCGGCTACCCATCGTTGCCCTGGTAAGCCATTACCCTACCAACTAGCTAATG\n+GGACGCGGGCACATCTCAAAGCGGATTACCCCTTTGGTTAAAGTTTAATGCGAAACTCTA\n+ACTTTATGCGTTATTAACCCTCCTTTCGGAGGGCTATTCCCCTCTTTGATGCAGGTTGCC\n+CACGTGTTACTCACCCGTCCGCCGCTAATCCACTTCCCGAAGGCAGATTCAACCCTCGAT\n+>HWI-M00234:263:000000000-ADM55:1:1101:11873:2948 1:N:0:ATCACG\n+CCCCGTTACATCTTCCGCGCAGGAAGACTCGATCAGTGAGCTATTACGCTTTCTTTGAAG\n+GATGGCTGCTTCTAAGCCAACTTCCTGACTGTCTTAGCCTTCCCACTTCGTTTTCCACTT\n+AGTCAATATTAGGGACCTTAGCCGGCGGTCTGGGTTGTTTCCCTCTTGAGTCCGGACGTT\n+AGCACCCGGTGCTCTGTCCCCCGTGCCCAAACTTCCAAGTATTCGGAGTTTGCCATGGTT\n+TGGTAAGACGCTATGTCCCCCTAACCATAACAGTGTTCTACCCTCTGAACACATACTCGA\n+>HWI-M00234:263:000000000-ADM55:1:1101:7855:3066 1:N:0:ATCACG\n+CGTCAATCTTCCAACGCCCACGGCAGATAGGGACAAAACTGTCTCACGACGTTTTAAACC\n+CAGCTCACGTACCTCTTTAAATGGCGAACAGCCATACCCTTGGGACCGGCTACAGC\n+>HWI-M00234:263:000000000-ADM55:1:1101:24817:3238 1:N:0:ATCACG\n+CCCGTGACCAGATTCATGGCCGCCGGTTAGAACCCCAGTACTGTCAGGGTGGTATCCCAA\n+GGA\n+>HWI-M00234:263:000000000-ADM55:1:1101:17179:3527 1:N:0:ATCACG\n+GTGCTTATTTCACTTCCTCAAAATCAGCATCCTGCACATTATCGCCATGCTTT\n+>HWI-M00234:263:000000000-ADM55:1:1101:21254:3571 1:N:0:ATCACG\n+GTCGCGGGGAAGATCAACTCCTGCAATTCGAGCCATACCAATTACCTCCTAGCGCCCTGA\n+CGCTGTTTATGACGAGGATCGCGGCTGCAGATGACGCGAATGACGCCATTGCGCTTGATC\n+ACTCGGCAATACTCACAAATGGGTTTAACAGATTATCTAACCTTCATAGTTTGCACACTC\n+CTCATGGACATTCC\n+>HWI-M00234:263:000000000-ADM55:1:1101:16569:3709 1:N:0:ATCACG\n+CCCGTCAATTCCTTTGAGTTTCATTCTTGCGAACGTACTCCCCAGGTGGAATACTTATTG\n+CGTTTGCTGCGGCACCGAATGGCTTTGCCACCCGACACCTAGTATTCATCGTTTACGGCG\n+TGGACTACCAGGGTATCTAATCCTGTTTGCTCCCCACGCTTTCGAGCCTCAACGTCAGGA\n+TCGGAAGAGCACACGTCTGAACTCCAGTCACATCCCG\n+>HWI-M00234:263:000000000-ADM55:1:1101:22403:3828 1:N:0:ATCACG\n+CCCGGACATCTTCGGCGCAGGATCTCTTGACTAGTGAGCTATTACGCACTCTTTAAATGA\n+GTGGCTGCTTCTAAGCCAACATCCTAGTTGTCTTAGAAATCCCACATCCTTTTCCACTTA\n+ACTTACACTTTGGGACCTTAGCTGATGAGATCGGAAGAGCACACGTCTGAACTCCAGTCA\n+CATCACGATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAAAAATATCAATCAAACACAC\n+ACACACCAGCTTAC\n+>HWI-M00234:263:000000000-ADM55:1:1101:7508:4067 1:N:0:ATCACG\n+GGTGCACTAGGATCGTAGTTGGCTACTTTCCCGTTTTCAATGTATACGCAAGGTACACGG\n+TCAGCGGTAGCTGCCATAATGTAATGGTAGTCAAATCCTAAGTCTCCCAATGCACATGGC\n+AGTGGAGCGTTCCAATCTTGTGTACCGGCTTTGTCTCCTAATCCCAAGTGCCATTTTCCG\n+ATAGCTCCGGTAGCATAACCGGTACTTTTGAACATAACCGCCATGGTATATTGTTCGGGA\n+CGGATAATCATTCCGGCATTTCCGGCAGCTACG\n+>HWI-M00234:263:000000000-ADM55:1:1101:26232:4161 1:N:0:ATCACG\n+CTCTTCTTAAGCTCGAACAGCTTCTTCAGTCTTACCTGCCATTTCATCTAAAATTTTTAA\n+AGCTCGCTCTAAAACAGTAGTATCATCAATCATAACCAATCCACCATCTGGTCCAGGTTC\n+CAAGTGAATCCCAACACTTTTCCCCTCCTTAAAATTGTGTCCACCGAAAAAATTTCTTAC\n+AGTGTCAACATTCAGTCCAAGTTCATCGGCTATTCTATGCATACTACCACTAGGCAATGA\n+GT\n+>HWI-M00234:263:000000000-ADM55:1:1101:23151:4189 1:N:0:ATCACG\n+GCGGATTTTCTCGGGAGTATGATTACCCACACTATTGGATTCTTCCGAAGAAGACTCCAT\n+ACTACCAAGTTCAGCTCGGATGGTGGATTTGCCTGCCAAGATCAGAAGAGCACACGTCTG\n+AACTCCAGTCACATCACGACTTCGTATGCCGTATTCTGCTTGAAAAAAACAATGAAAACT\n+AGTCACCGATGACTCACGTTCGCTGAACATTATCTGACAACTAGCTTAACAATGGACATG\n+CTTAGCACTGACAACCAGACG\n+>HWI-M00234:263:000000000-ADM55:1:1101:17148:4803 1:N:0:ATCACG\n+CCCTTTTCGGCAATGGCGATGTTACGGTACATAAGAGCGATTTCGGGGAAACCTTCTTCG\n+TCTGCAATATCAGCAAACTTAGGATAATCCAATGA\n+>HWI-M00234:263:000000000-ADM55:1:1101:8939:4808 1:N:0:ATCACG\n+CCACAGTTCTGTCATATCGCAGTACAGGAATCTCAACCTGTTGTCCATCGGCTACGGCTC\n+TCGCCCTCGCCTTAGGCCCCGACTTACCCAGGGCAGATCAGCTTTA\n+>HWI-M00234:263:000000000-ADM55:1:1101:11727:5103 1:N:0:ATCACG\n+CTCCCGTATAGGATGGATCGAAAGCAATCGTCAGGAATCCACGTTCTGCCAATGTCTGCG\n+CATACAAGCCCGAAGCTTGTTCCTTCACGGCACCAAATGGGCCACTGATGGCAATGGCAG\n+GCAATTTACCCGTCACATTCTTGGGTACAT'..b'0000-ADM55:1:1101:26932:5861 1:N:0:ATCACG\n+AGCGTCAGTTACAGTCCAGAAAGCCGCCTTCGCCACTGGTGTTCTTCCTAATCTCTACGC\n+ATTTCACCGCTACACTAGGAATTCCGCTTTCCTCTCCTGCACTCTAGATATCCAGTTTGG\n+AATGCAGCCCCCAGGTTAAGCCCGGGGATTTCACATCCCACTTAAACATCCGCCTACGCA\n+CCCTTTACCACCAGTAAATCCGGACAACGCTCGCCACCTACGTATTACCGCGGCTGCTGG\n+CACGTAGTTAGCCGTGGCTTCCTCCTCTGGTACCTTCATTATCGTCCCCGAAACC\n+>HWI-M00234:263:000000000-ADM55:1:1101:14154:5990 1:N:0:ATCACG\n+GTGGGATGTCAAGGTAAGCGGCGGCAGCCTTGGCTACAGCAAGAGATACGCCGAGGATGG\n+CATTGGCACCCAGATTGGCTTTCGTTTTGGTTCCGTCCAGTGCAAGCATGGCATGGTCGA\n+TACCCATCTGGTCGAGGGCGGACATACCGATGAGCTTGGGAGCAATGATATTATTGATAT\n+TGTCCACGGCTTTCAGGAC\n+>HWI-M00234:263:000000000-ADM55:1:1101:25022:6229 1:N:0:ATCACG\n+CCCGAGAACGTATTCACCGCAACATTGCTGATTTGCGATTACTAGTGATTCCAACTTCAT\n+GTACTCGAGTTGCAGAGTACAATCCGAACTACGAACAGCTTTCTGAGGTTTGCTCCTCCT\n+CGCAGATTTGCTGATCGGAAGAGCACACGTCTGAACTCCAGTCACATC\n+>HWI-M00234:263:000000000-ADM55:1:1101:21023:6286 1:N:0:ATCACG\n+GTGTACAAGGCCCGGGAACGTATTCACCGCGCCGTGGCTGATGCGCGATTACTAGCGAAT\n+CCAGCTTCATGGAGTCGGGTTGCAGACTCCAATCCGAACTGAGAGAGGCTTTAGGGATTA\n+GCATCACGTCGCCGTGTAGCTGCCTTCTGTACCCCCCATTGTAACACGTGTGTAGCCCCG\n+GACGTAAGGGCCGTGCTGATTTGCCGTCATCCCCACCTTCCTCAC\n+>HWI-M00234:263:000000000-ADM55:1:1101:8138:6336 1:N:0:ATCACG\n+TGTGTACAAGGCCCGGGAACGTATTCACCGCGACATTCTGATTCGCGATTACTAGCAACT\n+CCAGCTTCATGTAGGCGAGTTGCAGCCTACAATCCGAACTGAGATAGGTTTTATAAGTTT\n+TGCTCCACCTCACGGTCTT\n+>HWI-M00234:263:000000000-ADM55:1:1101:11055:6543 1:N:0:ATCACG\n+CCTGGACATTTTCGGCGCAGGACCTCCCGACCAGTGAGCTATTACGCACTCTTTGACTGT\n+GTGGCTGCTTCTGAGCCAACATCCTGGTTGACTTTGAAATCTCACATACTTTTCCACTTA\n+ACACGCACTTTGGGACCTTAGCTGGAGGTCTTGGCTCTTTCCTTTTTGCCTATCCAACTT\n+ATCTCGTATAGTCTGACTCCCGG\n+>HWI-M00234:263:000000000-ADM55:1:1101:24980:6575 1:N:0:ATCACG\n+GCCATTTATCTACGCCTGTTGTCACCAACAGGCTCTAGCGGTCTACCCTCCGACGTGGGG\n+CGAGCAACCCTCATA\n+>HWI-M00234:263:000000000-ADM55:1:1101:7596:6682 1:N:0:ATCACG\n+GCCCCCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATACTTA\n+ACGCTTTCGCTTGGCGGCTGACAATGTATCGCCAACCGCGAGTATTCATCAGATCGGAAG\n+AGCACACGTCTGAACTCCAGTCACATCCCGATCTCGTATGCCGTCCTCGGCTTGAAAAAA\n+AAAAATATAAACCATGATGAATTTCGAACACTCACGACCACACACTAAGCGTCGACGTA\n+>HWI-M00234:263:000000000-ADM55:1:1101:24419:6736 1:N:0:ATCACG\n+CGGGCGTTGGCAAGTACGTTATATCACTTACCCACGCTCGATTCAGAACCCCAGGAGTAA\n+ATGCTCGTTTTAAGAGGTTGGGATACACAGGCATGGTGTGATTACTGTCGGTGGTTTTTA\n+CGGTTCGCTTAAAAAAGCGGTAACCAATGATGCCATTAAAGCGAAGGATAGCGCGTAAAC\n+GTTTGACACCAACTTTCACTCCACGGTGTTGCATCAAAGC\n+>HWI-M00234:263:000000000-ADM55:1:1101:5068:6755 1:N:0:ATCACG\n+GCAGCGGCAGAACCAAAGCCATCAACCGTTGCCTTATCCTCTAAATTTTCACCCGCGCCA\n+CGAGGCTGACACGAACTATCCCCGATTTGACTGCACCACTTTATCGGAATGCTTCGGAGC\n+AACAGCTTCCGAGTGATGTCACGTCCCAGCACCTGGTGCCGGGATTAAGCTTCAATCTAC\n+TATACTTCGATTAAGCAGCGAGAGCGTAACGAGTTTCGCCAGATAAAATTTTGAGGACTG\n+AGATTAAAGTGCAAATC\n+>HWI-M00234:263:000000000-ADM55:1:1101:21477:6796 1:N:0:ATCACG\n+CCCCACTGCTGCTTCCCGTAGGAGTTTGGACCGTGTCTCAGTTCCAATGTGGGGGACCTT\n+CCTCTCAGAACCCCTATCCATCGTTGACTAGGTGGGCCGTTACCCCGCCTACTATCTAAT\n+GGAACGCATCCCCATCGTCTACCGGAAAATACCTT\n+>HWI-M00234:263:000000000-ADM55:1:1101:12483:6880 1:N:0:ATCACG\n+CTCCTTGTCTGCCTTCGATTACGGCATTTGCAATCGTTCCTGTAAGTAGTTTTACAGCTC\n+TAATGGCATCGTCATTCCCTGGTATTACGTAGTCTATTTCATCAGGATCACAGTTAGTAT\n+CAACTATTGCTACAACTGGTATTCCTAATATCTTCGCTTCATTTACTGCAATTCTTTCTT\n+TTCTTGGATCTACAACAAATAATACATCAGGAAGACCACCCATATCTTGAATGCCACCTA\n+AAAACTTTTCAA\n+>HWI-M00234:263:000000000-ADM55:1:1101:27958:6935 1:N:0:ATCACG\n+CCATTCGGAAATCCGCGGATCAAAGGTCATTTGCACCTACCCGCAGCTTATCGCAGCTTA\n+TCACGTCCTTCATCGCCTCCGAGAGCCAAGAGATCGGAAGAGCACACGTCTGAACTCCAG\n+TCACATCACGATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAACCCACAAATTCACACT\n+ACACACCCACCTCCCATCACGCATCTCTTTTGTCCGAGTCACGCTGCACGCTACCTGCAC\n+ACTACCTGCCTCAGTTACATCTTTTTACTCAATGTCCCACCATTTATCAC\n+>HWI-M00234:263:000000000-ADM55:1:1101:6151:7404 1:N:0:ATCACG\n+GCACGTAGTTAGCCGGGGCTTCCTCCTAAGGTACCGTCATTATCGTCCCTTAGGACAGAG\n+CTTTACGATCCGAAAACCTTCATCGCTCACGCGGCGTCGCTGCATCAGAGTTCCCTCCAT\n+TGTGCAATATTCCCCACTGCTGCCTCCCGTAGGAGTCTGGACCGTGTCTCAGTTCCAGTG\n+TGGCCGTTCACCCTCCCAGGCCGGCTACCCATCGTCGCCTTGGTTAGCTATTACCTCCCC\n+AACTAGCTAATGGGACGCGAGTCCAGCTTATACCACTTAACCGCTTTTGCTTGAAAATCA\n'
b
diff -r 000000000000 -r 01c2b74b3a21 test-data/input_sequence_file.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_sequence_file.fastq Tue Apr 26 08:18:18 2016 -0400
b
b'@@ -0,0 +1,144 @@\n+@HWI-M00234:263:000000000-ADM55:1:1101:21704:2233 1:N:0:ATCACG\n+GTGTACAAGGCCCGGGAACGTATTCACCGCGGCATGCTGATCCGCGATTACTAGCGATTCCAACTTCATACAGGCGGGTTTCAGCC\n++\n+BCCCCGGED8DFGC::+@C@9@,,EGFAF@CC7@@@FGAF<@FGGEGD7:<6EDDECCFGGCFGGC<66,F<FFCGFC4BFF?5?<\n+@HWI-M00234:263:000000000-ADM55:1:1101:21334:2699 1:N:0:ATCACG\n+CTTTTGACGGCCTTTTTGTTGTCTTCTGAATTTTGTTTTTTTCGGTTGTAACATAATTCCTAAAAATCAAATTCGTTAGCGATTATTTTTCTTTCTTTTGAAGTTCTTTCCGCCATTGTTTCCGCTATTGCTTCCACGACC\n++\n+CCCCCGFGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGG@FGGGGGGGGEADGGGGGGGGGGGGGFGGGGFF@CFFEGGDGGGGGFGGGGGGGGFGFCFGGGGGFGCE=FGFDGFGGCEEGGGGDFGGGFGGGGGC\n+@HWI-M00234:263:000000000-ADM55:1:1101:13968:2885 1:N:0:ATCACG\n+CCCACTGCTGCCTCCCGTAGGAGCCTGGACCGTGTCTCAGTTCCAATGTGGCCGATCACCCTCTCAGGTCGGCTACCCATCGTTGCCCTGGTAAGCCATTACCCTACCAACTAGCTAATGGGACGCGGGCACATCTCAAAGCGGATTACCCCTTTGGTTAAAGTTTAATGCGAAACTCTAACTTTATGCGTTATTAACCCTCCTTTCGGAGGGCTATTCCCCTCTTTGATGCAGGTTGCCCACGTGTTACTCACCCGTCCGCCGCTAATCCACTTCCCGAAGGCAGATTCAACCCTCGAT\n++\n+<CC8CEFDGGFDF<,6@F7FCGF,:FFCED@@@,;;,<CFFFA,6C@FC@,<E+87FFCFCGFF8,<<FG7+@F7FEFFGGGCFECF,=<B=EF99F@9<FAFFGDF?=E:=4:,9,,CF9A4,>8>C+>3BFGD;@,7>,==>6?=9,,@+@4<=699C9,@+@<+72;@5;0*9:?82BF85=@2=C###############################################################################################################\n+@HWI-M00234:263:000000000-ADM55:1:1101:11873:2948 1:N:0:ATCACG\n+CCCCGTTACATCTTCCGCGCAGGAAGACTCGATCAGTGAGCTATTACGCTTTCTTTGAAGGATGGCTGCTTCTAAGCCAACTTCCTGACTGTCTTAGCCTTCCCACTTCGTTTTCCACTTAGTCAATATTAGGGACCTTAGCCGGCGGTCTGGGTTGTTTCCCTCTTGAGTCCGGACGTTAGCACCCGGTGCTCTGTCCCCCGTGCCCAAACTTCCAAGTATTCGGAGTTTGCCATGGTTTGGTAAGACGCTATGTCCCCCTAACCATAACAGTGTTCTACCCTCTGAACACATACTCGA\n++\n+CCCCCCD;B6C,;<EAFECF@FGGGGAECEG@F@FGGGFAAF9FGGCF>F7<FCFF<6FEFFFCDCFGGGEC<F,<A<EFC@F9BFGE<9DFFGEEGG@FFGFFGF8FDEFC<F,94:EF9,C=EGGGCCFCCE>B+>E9FCGG6+@CC>EFC8D>CEF,E=@=E?C,6=AFG>>9?DCGC>FG>GGCDA5*;+=AFF*A;/?B)-)(;64:)>3<E<3))7<E3=;>FDF?E<<4)3;EF@FFA#######################################################\n+@HWI-M00234:263:000000000-ADM55:1:1101:7855:3066 1:N:0:ATCACG\n+CGTCAATCTTCCAACGCCCACGGCAGATAGGGACAAAACTGTCTCACGACGTTTTAAACCCAGCTCACGTACCTCTTTAAATGGCGAACAGCCATACCCTTGGGACCGGCTACAGC\n++\n+CCCCCGGGGGGGGFGGGCBCFGGGGGFGGGGGGGGGGGGGGGG,CEGGGGGGGGGGGGGG@F@FGGGGGGGG@@EGFGGGFGFGCGEGGGGGGGGGGCFGGGGEGEEE7FFGFFGD\n+@HWI-M00234:263:000000000-ADM55:1:1101:24817:3238 1:N:0:ATCACG\n+CCCGTGACCAGATTCATGGCCGCCGGTTAGAACCCCAGTACTGTCAGGGTGGTATCCCAAGGA\n++\n+CCCCC7FGFGEGGGGGGDFGGGGGGDGGGGGGGGGGGGGGGGGGGGGFEECGGGGGGFFGGGG\n+@HWI-M00234:263:000000000-ADM55:1:1101:17179:3527 1:N:0:ATCACG\n+GTGCTTATTTCACTTCCTCAAAATCAGCATCCTGCACATTATCGCCATGCTTT\n++\n+CCCCCG<FCCGGFGGFGGGGGGGGFGGGGGGDEG?<ECGCFEG<EGGGF,CFG\n+@HWI-M00234:263:000000000-ADM55:1:1101:21254:3571 1:N:0:ATCACG\n+GTCGCGGGGAAGATCAACTCCTGCAATTCGAGCCATACCAATTACCTCCTAGCGCCCTGACGCTGTTTATGACGAGGATCGCGGCTGCAGATGACGCGAATGACGCCATTGCGCTTGATCACTCGGCAATACTCACAAATGGGTTTAACAGATTATCTAACCTTCATAGTTTGCACACTCCTCATGGACATTCC\n++\n+CCCCCFEFGGGGGGGGGGGGGGGGGGGGGGGGGGGG<FCCGGFFGGDFGGGFGGCGG7FGGGGGCGGAFFEEGC:FGGGGGGGGGDFGGFAGFGGGG7FEGGGGGGE7FGGG>EGGGFFFCFGGFCEEGGFGCGGGFGFDGFFGGCFGGG7C;,5=EF,CG59DCCG@,5CFFFG=FC*CFFGGGDFGGAFFFF\n+@HWI-M00234:263:000000000-ADM55:1:1101:16569:3709 1:N:0:ATCACG\n+CCCGTCAATTCCTTTGAGTTTCATTCTTGCGAACGTACTCCCCAGGTGGAATACTTATTGCGTTTGCTGCGGCACCGAATGGCTTTGCCACCCGACACCTAGTATTCATCGTTTACGGCGTGGACTACCAGGGTATCTAATCCTGTTTGCTCCCCACGCTTTCGAGCCTCAACGTCAGGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCCCG\n++\n+CCCCCGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGG9FGGGGDGGGGGGGECGGGGGEGCGGGGGGGDGGGFGGGGGFGGGGGGFGGGGGGGGGGGACFGGGFGGGGGFGGGGGCFGGGGGGFFFFFFEF:65CFFF8FEF7AFEFEE(,6\n+@HWI-M00234:263:000000000-ADM55:1:1101:22403:3828 1:N:0:ATCACG\n+CCCGGACATCTTCGGCGCAGGATCTCTTGACTAGTGAGCTATTACGCACTCTTTAAATGAGTGGCTGCTTCTAAGCCAACATCCTAGTTGTCTTAGAAATCCCACATCCTTTTCCACTTAACTTACACTTTGGGACCTTAGCTGATGAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAAAAATATCAATCAAACACACACACACCAGCTTAC\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGEGGGGGGGGFGGGGGGGGGGGGGGGGFGGGGG'..b'GGGGGG@G<FFFGGGEEGGGGGGFGGCECFGGGGGD:@FGGGGG7ADGGG\n+@HWI-M00234:263:000000000-ADM55:1:1101:7596:6682 1:N:0:ATCACG\n+GCCCCCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATACTTAACGCTTTCGCTTGGCGGCTGACAATGTATCGCCAACCGCGAGTATTCATCAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCCCGATCTCGTATGCCGTCCTCGGCTTGAAAAAAAAAAATATAAACCATGATGAATTTCGAACACTCACGACCACACACTAAGCGTCGACGTA\n++\n+CCCCCGGGGGGACFGGGGGFFEEGGGGGGFFEGGGGGGGGGGEGFGGGGEFG@FGDFFGGGGGGGEGGGDGFFCC>FGCDFGFGGGCE,E:FCCCC7CGGGGGGFE;FFEFCCGFGG@CEFGGGG@F@>@8DGGG=;@FFFGGC;E6,@@+4:,318;9,@FG@###########################################################################\n+@HWI-M00234:263:000000000-ADM55:1:1101:24419:6736 1:N:0:ATCACG\n+CGGGCGTTGGCAAGTACGTTATATCACTTACCCACGCTCGATTCAGAACCCCAGGAGTAAATGCTCGTTTTAAGAGGTTGGGATACACAGGCATGGTGTGATTACTGTCGGTGGTTTTTACGGTTCGCTTAAAAAAGCGGTAACCAATGATGCCATTAAAGCGAAGGATAGCGCGTAAACGTTTGACACCAACTTTCACTCCACGGTGTTGCATCAAAGC\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGFGGGGGGGDGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGFGGGFGGGGGGGGD9FFCFEGGGFGGGGG7=+4>@EFEEFFG7>EEDEEEFDDDG@8DCFEFDFGGC8=CDCGGFGGDC6>DDGDCFDGDGDGFGG@AFBA?FFFFFFFFABBF95B@AFFFFFFFF\n+@HWI-M00234:263:000000000-ADM55:1:1101:5068:6755 1:N:0:ATCACG\n+GCAGCGGCAGAACCAAAGCCATCAACCGTTGCCTTATCCTCTAAATTTTCACCCGCGCCACGAGGCTGACACGAACTATCCCCGATTTGACTGCACCACTTTATCGGAATGCTTCGGAGCAACAGCTTCCGAGTGATGTCACGTCCCAGCACCTGGTGCCGGGATTAAGCTTCAATCTACTATACTTCGATTAAGCAGCGAGAGCGTAACGAGTTTCGCCAGATAAAATTTTGAGGACTGAGATTAAAGTGCAAATC\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFF?FGGGGGDFGGGGGGGGGGFFFFGGGGGFGGFGGGGGGDGGGGGGGFGGGEEDGGG9C9EEGGGGGD8@FGGGGGGGGGGGGG>CFGGGGGCFGDGDFGFGGF7CFGDF>FFFAF=;A>EDDBFBBFFF@D@D<E?@F01;@<?C<471.2)48<??2(549<B>FDF24)66<))\n+@HWI-M00234:263:000000000-ADM55:1:1101:21477:6796 1:N:0:ATCACG\n+CCCCACTGCTGCTTCCCGTAGGAGTTTGGACCGTGTCTCAGTTCCAATGTGGGGGACCTTCCTCTCAGAACCCCTATCCATCGTTGACTAGGTGGGCCGTTACCCCGCCTACTATCTAATGGAACGCATCCCCATCGTCTACCGGAAAATACCTT\n++\n+CC,AC:F8<F@CCEDGGCEGGGGGFAG<FGG8@FEFEFFCEGGCFF<<<F@<C+@FF:AFCEFE,ECF9AFGC@DE<FE,,5@9B8BFFCE8F,=FF@CG7FCFGD:FFF7EAFDG9F;FFF@:FEEG@4=@DEFG6=EGCADC6@ECCA,@DDF\n+@HWI-M00234:263:000000000-ADM55:1:1101:12483:6880 1:N:0:ATCACG\n+CTCCTTGTCTGCCTTCGATTACGGCATTTGCAATCGTTCCTGTAAGTAGTTTTACAGCTCTAATGGCATCGTCATTCCCTGGTATTACGTAGTCTATTTCATCAGGATCACAGTTAGTATCAACTATTGCTACAACTGGTATTCCTAATATCTTCGCTTCATTTACTGCAATTCTTTCTTTTCTTGGATCTACAACAAATAATACATCAGGAAGACCACCCATATCTTGAATGCCACCTAAAAACTTTTCAA\n++\n+CCCCCGGGGGGGFGGFGGGGGFGDFEGGGGGFFFGGFGGGGGGGGFGGGFGGGGGGGGCEGGGGCFGGGGGGGGGGGGFGGGFEGGDGGGGFFFGGGGGGGGFGGGGGGGGGGFGFGCF@FEFFGDAGFGFGGGGFGGGGDGGGFFGGGD;EEFGGGGGGGFGGGGGGGGGGFDGDGGGGGGGGGGGDGFFFFFFF9;;DFFFFFBFFBDBFFBFD=DDFDFFFEFEFEFEECE<@E@CFF@EBAFFEEEFF\n+@HWI-M00234:263:000000000-ADM55:1:1101:27958:6935 1:N:0:ATCACG\n+CCATTCGGAAATCCGCGGATCAAAGGTCATTTGCACCTACCCGCAGCTTATCGCAGCTTATCACGTCCTTCATCGCCTCCGAGAGCCAAGAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAACCCACAAATTCACACTACACACCCACCTCCCATCACGCATCTCTTTTGTCCGAGTCACGCTGCACGCTACCTGCACACTACCTGCCTCAGTTACATCTTTTTACTCAATGTCCCACCATTTATCAC\n++\n+CCCCCGGGGGGGGGGGDGGCFGGGGGGGFEGGGGGGGGGGGGGGGGGGGGGGGGGGG8DC<FGGGFFGGGGGGGGGGCGF:FDGGGGGG@FEEEGGGEGDGGGFGGGGGG:FFGG=EGFFFCEFG9EEFGD+@@EEDED8DA>EEC@EFGFAFFGF=F?+44;C##############################################################################################################################\n+@HWI-M00234:263:000000000-ADM55:1:1101:6151:7404 1:N:0:ATCACG\n+GCACGTAGTTAGCCGGGGCTTCCTCCTAAGGTACCGTCATTATCGTCCCTTAGGACAGAGCTTTACGATCCGAAAACCTTCATCGCTCACGCGGCGTCGCTGCATCAGAGTTCCCTCCATTGTGCAATATTCCCCACTGCTGCCTCCCGTAGGAGTCTGGACCGTGTCTCAGTTCCAGTGTGGCCGTTCACCCTCCCAGGCCGGCTACCCATCGTCGCCTTGGTTAGCTATTACCTCCCCAACTAGCTAATGGGACGCGAGTCCAGCTTATACCACTTAACCGCTTTTGCTTGAAAATCA\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGDF8FGGGFGGGGEGGG7+CGGFGGGGGGGGC:8FGGGDFGFEGGGFGGGFGCGGGGGGGGDG?EGGGGGFF,<FGGGGFDGGGG,5,EFGGGD++*5:DGFCDD?6>AFFF4?6)5?EB?D0>DFFFAF0(/.6<D03;3().646<A)).))(4:0<BFBD?F<2<AFF<B############################################\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 01c2b74b3a21 test-data/report_illumina_1_3_fastq_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/report_illumina_1_3_fastq_output.txt Tue Apr 26 08:18:18 2016 -0400
b
@@ -0,0 +1,4 @@
+Information to extract:
+ sequences
+Number of similarity records: 36
+Number of extracted similarity records: 36
b
diff -r 000000000000 -r 01c2b74b3a21 test-data/report_length_fasta_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/report_length_fasta_output.txt Tue Apr 26 08:18:18 2016 -0400
b
@@ -0,0 +1,4 @@
+Information to extract:
+ length
+Number of similarity records: 36
+Number of extracted similarity records: 36