Repository 'assemblystats'
hg clone https://toolshed.g2.bx.psu.edu/repos/nml/assemblystats

Changeset 0:ad2b274663f8 (2017-11-07)
Next changeset 1:7556309ffbaf (2020-05-29)
Commit message:
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 3f9ae719338c7c8db81d645b8ee09727e2d9ce23
added:
README_ASSEMBLY_STATS
assembly_stats_txt.py
assembly_stats_txt.xml
fasta_summary.pl
test-data/Assembly_statistics_-_SRR1002850_SMALL.fasta.tabular
test-data/SRR1002850_SMALL.fasta
b
diff -r 000000000000 -r ad2b274663f8 README_ASSEMBLY_STATS
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README_ASSEMBLY_STATS Tue Nov 07 12:28:31 2017 -0500
b
@@ -0,0 +1,16 @@
+#Created 07/01/2011
+#Konrad Paszkiewicz, University of Exeter
+
+#Modified by Mariam Iskander and Matthew Gopez, October 13th, 2017
+
+Assembly stats
+
+This series of scripts calculates various metrics on an input FASTA file. Typically this is most useful on either denovo genomic or transcriptomic data.
+
+Prerequisites:
+
+1. The bundled perl script fasta_summary.pl 
+
+Limitations:
+
+Ideally this should output a composite dataset of some sort
b
diff -r 000000000000 -r ad2b274663f8 assembly_stats_txt.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/assembly_stats_txt.py Tue Nov 07 12:28:31 2017 -0500
[
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Version 1.01 - bugs kindly corrected by Jan van Haarst
+# Modified by Matthew Gopez October 13th, 2017
+
+import logging
+import os
+import subprocess
+import sys
+
+
+log = logging.getLogger(__name__)
+
+assert sys.version_info[:2] >= (2, 4)
+
+
+def stop_err(msg):
+    sys.stderr.write('%s\n' % msg)
+    sys.exit()
+
+
+def __main__():
+
+    # Parse Command Line
+
+    working_dir = sys.argv[2]
+    type = sys.argv[3]
+    bucket = sys.argv[4]
+    input = sys.argv[5]
+    stats = sys.argv[6]
+    sortedcontigs = sys.argv[7]
+    histogrampng = sys.argv[8]
+    summedcontigspng = sys.argv[9]
+    histogramdata = sys.argv[10]
+    summedcontigdata = sys.argv[11]
+    try:  # for test - needs this done
+        os.makedirs(working_dir)
+    except Exception, e:
+        stop_err('Error running assembly_stats_txt.py ' + str(e))
+
+    cmdline = '%s/fasta_summary.pl -i %s -t %s %s -o %s > /dev/null' \
+        % (os.path.dirname(sys.argv[0]), input, type, bucket,
+           working_dir)
+    try:
+        proc = subprocess.Popen(args=cmdline, shell=True,
+                                stderr=subprocess.PIPE)
+        returncode = proc.wait()
+
+        # get stderr, allowing for case where it's very large
+
+        stderr = ''
+        buffsize = 1048576
+        try:
+            while True:
+                stderr += proc.stderr.read(buffsize)
+                if not stderr or len(stderr) % buffsize != 0:
+                    break
+        except OverflowError:
+            pass
+        if returncode != 0:
+            raise Exception
+    except Exception, e:
+        stop_err('Error running assembly_stats.py ' + str(e))
+
+    stats_path = os.path.join(working_dir, 'stats.txt')
+    sorted_contigs_path = os.path.join(working_dir, 'sorted_contigs.fa')
+    histogram_png_path = os.path.join(working_dir,
+                                      'histogram_bins.dat.png')
+    summed_contigs_path = os.path.join(working_dir,
+                                       'summed_contig_lengths.dat.png')
+    histogram_data_path = os.path.join(working_dir, 'histogram_bins.dat')
+    summed_contigs_data_path = os.path.join(working_dir,
+                                            'summed_contig_lengths.dat')
+
+    out = open(stats, 'w')
+    for line in open(stats_path):
+        out.write('%s' % line)
+    out.close()
+
+    out = open(sortedcontigs, 'w')
+    for line in open(sorted_contigs_path):
+        out.write('%s' % line)
+    out.close()
+
+    out = open(histogrampng, 'w')
+    for line in open(histogram_png_path):
+        out.write('%s' % line)
+    out.close()
+
+    out = open(summedcontigspng, 'w')
+    for line in open(summed_contigs_path):
+        out.write('%s' % line)
+    out.close()
+
+    out = open(histogramdata, 'w')
+    for line in open(histogram_data_path):
+        out.write('%s' % line)
+    out.close()
+
+    out = open(summedcontigdata, 'w')
+    for line in open(summed_contigs_data_path):
+        out.write('%s' % line)
+    out.close()
+
+
+if __name__ == '__main__':
+    __main__()
b
diff -r 000000000000 -r ad2b274663f8 assembly_stats_txt.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/assembly_stats_txt.xml Tue Nov 07 12:28:31 2017 -0500
[
@@ -0,0 +1,79 @@
+<tool id="assemblystats" name="assemblystats" version="1.0.1">
+ <description>Summarise an assembly (e.g. N50 metrics)</description>
+ <requirements>
+ <requirement type="package" version="1.6.924">perl-bioperl</requirement>
+ <requirement type="package" version="5.0.4">gnuplot</requirement>
+ </requirements>
+ <command detect_errors="exit_code"><![CDATA[
+ python $__tool_directory__/assembly_stats_txt.py 
+
+ '$type' 
+ '$stats.extra_files_path'
+ '$type'
+ '$bucket'
+ '$input'
+ '$stats'
+ '$sortedcontigs'
+ '$histogrampng' 
+ '$summedcontigspng'
+ '$histogramdata' 
+ '$summedcontigdata' 
+ ]]></command>
+ <inputs>
+ <param label="Type of read" name="type" type="select" help="Is this from an genomic (contig) or transcriptomic assembly (isotig) or are these raw reads (read)">
+ <option value="contig" selected="yes">Contig (if from genomic assembly)</option>
+ <option value="isotig">Isotig (if from transcriptomic assembly)</option>
+ <option value="read">Raw reads from sequencer in FASTA format (useful for 454 data)</option>
+ </param>
+ <param name="bucket" type="boolean" label="Output histogram with bin sizes=1" truevalue="-b" falsevalue="" help="Use this to specify whether or not bin sizes of 1 should be used when plotting histograms"/>
+ <param format="fasta" name="input" type="data" label="Source file in FASTA format"/>
+ <param name = "all_outputs" type ="boolean" checked="false" label="Return all output files" help="If checked, all output files will be displayed. If not checked, only the file 'Assembly Statistics' will be provided." />
+ </inputs>
+ <outputs>
+ <data format="tabular" name="stats" label="Assembly statistics - $input.display_name"/>
+ <data format="fasta" name="sortedcontigs" label="Sorted contigs - $input.display_name" >
+ <filter>all_outputs is True</filter>
+ </data>
+ <data format="png" name="histogrampng" label="Histogram of contig sizes - $input.display_name">
+ <filter>all_outputs is True</filter>
+ </data>
+ <data format="png" name="summedcontigspng" label="Cumulative sum of contig sizes - $input.display_name">
+ <filter>all_outputs is True</filter>
+ </data>
+ <data format="tabular" name="histogramdata" label="Histogram data - $input.display_name">
+ <filter>all_outputs is True</filter>
+ </data>
+ <data format="tabular" name="summedcontigdata" label="Cumulative sum of contig size data - $input.display_name">
+ <filter>all_outputs is True</filter>
+ </data>
+ </outputs>
+ <tests>
+ <test>
+ <param name="input" value="SRR1002850_SMALL.fasta"/>
+ <output name="stats" value="Assembly_statistics_-_SRR1002850_SMALL.fasta.tabular"/>
+ </test>
+ </tests>
+ <help><![CDATA[
+ **Summarise assembly overview** 
+
+ This script is used to give summary statistics of an assembly or set of reads. Typically this is run after an assembly to evaluate gross features.
+
+
+ # Gives back
+ # - N50
+ # - num of contigs > 1 kb
+ # - num of contigs
+ # - Read or Contig Histogram and graphs.
+ # - Summed contig length (by number of contigs, in sorted order)
+ ]]>
+
+ </help>
+ <citations>
+ <citation type="bibtex">@ARTICLE{a1,
+ title = {Summarise an assembly (e.g. N50 metrics)},
+ author = {Konrad Paszkiewicz, Sujai Kumar, Mariam Iskander},
+ url = {https://github.com/phac-nml/galaxy_tools/}
+ }
+ }</citation>
+ </citations>
+</tool>
b
diff -r 000000000000 -r ad2b274663f8 fasta_summary.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_summary.pl Tue Nov 07 12:28:31 2017 -0500
[
b'@@ -0,0 +1,776 @@\n+#!/usr/bin/env perl\n+\n+#==============================================================================================\n+\n+# Script to output statistsics and histograms for reads and contigs/isotigs\n+\n+\n+# Outputs include:\n+#    Mean, N50, StdDev or reads or contig lengths,\n+#    Mean and Modal read or contig lengths.\n+#    Number of reads or contigs > 1 kb in length \n+#    Summed contig length (by number of contigs, in sorted order)\n+#    Histogram of read or contig lengths,\n+#    Graph of sums of read-lengths\n+#    File of reads or contigs sorted by read or contig length\n+#    Test for mono/di-nucelotide repeats\n+#    Randomly selected reads or contigs\n+\n+\n+# Needs gnuplot installed to create the histograms:\n+#   On Fedora/Redhat linux: sudo yum install gnuplot \n+#   On Ubuntu/Debian: sudo apt-get install gnuplot\n+\n+#  Uses a linux pipe to call gnu-plot directly, rather than as a separate shell script.\n+\n+# Original written by Sujai Kumar, 2008-09-05 University of Edinburgh\n+# Modified by Stephen: 29-Apr-2009:\n+# Last changed by Stephen: 9-Aug-2010\n+\n+\n+# Usage: fasta_summary.pl -i infile.fasta  -o process_reads -t read OR contig OR isotig (to use \'read\' or \'contig\' or \'isotig\' in the output table & graphs. Isotig is for \'runAssembly -cdna ...\' output file \'454Isotigs.fna\') [-r 1 to indicate count simple nucleotide repeats] [-n number of random reads to output] [-c cutoff_length] [-l 1 to indicate output the longest read] [-f (s or t or w) for spacer, tab or wiki format table output.]\n+\n+# Note: The parameters above in the [] are optional.\n+\n+# eg: fasta_summary.pl -i myfile.fasta  -o process_reads -t read\n+# Where:\n+#  -i reads or contigs as input, in fasta format.\n+#  -o output_dir (created if it doesn\'t exist)\n+#  -t read, contig or isotig\n+\n+# Gives back\n+# - N50\n+# - num of contigs > 1 kb\n+# - num of contigs\n+# - Read or Contig Histogram and graphs.\n+# - Summed contig length (by number of contigs, in sorted order)\n+\n+#==============================================================================================\n+\n+\n+use strict;\n+use warnings;\n+use Getopt::Long;\n+\n+my $infile;\n+my $output_dir;\n+my $type=\'read\'; # Defaults to \'read\' at present\n+my $repeats=1;\n+my $num_random_reads_to_output=0;\n+my $cutoff_length=-1; # -1 means won\'t check this cutoff\n+my $longest_read=-1; # -1 mean\'s don\'t output the sequence for the longest read.\n+my $doCommify=1; # Outputs statistics numbers in format: 9,999,999\n+my $format="t";  # "s"=spaces between columns, "t"=tabs between columns, "w"=wiki \'||\' and \'|\'.\n+my $bucket1=0; # For optional exact length histogram distribution as asked for by JH.\n+\n+if ($#ARGV==-1) {die "\n+ Usage:  \n+\n+    fasta_summary.pl -i infile.fasta  -o output_dir  -t ( read | contig | isotig ) [ -r 0 ] [ -n num_reads ] [ -c cutoff_length ] [ -l 1 ] [ -d 0 ] [ -f (w | t ) ] [ -bucket1 ]\n+\n+ where:\n+\n+    -i or -infile  infile.fasta  :  input fatsa file of raeds, contigs or isotigs, \n+\n+    -o or -output_dir output_directory : directory to put output stats and graphs into.\n+\n+    -t or -type (read or contig or isotig) : for displaying the graph title, where type is \'read\' or \'contig\' or \'isotig\'.\n+\n+    -r or -repeats 0 or 1        :  1=count number of reads that contain over 70% simple mono-nucleotide and di-nucleotide repeat bases; 0=don\'t count.\n+\n+    -n or -number num_reads      : For outputting specified number of randomly selected reads or contigs.\n+\n+    -c or -cutoff cutoff_length  : Give a number of reads to do extra analysis (calculating again the number of reads and number of bases in reads above this length)\n+\n+    -l or -longest 0 or 1        : 1=Output the longest read;  0= don\'t output the longest read\n+\n+    -d or -doCommify 0 or 1      : Output numbers formatted with commas to make easier to read: 0=no commas, default=1\n+\n+    -f or -format w or t         : w=wiki_format (ie. table with || and | for column dividers), t=tabs between column symbols for the wiki pages, default is space'..b'un=R_2009_04_23_17_54_06_\n+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+\n+>FUOMOGO01AQV42 length=339 xy=0189_0676 region=1 run=R_2009_04_23_17_54_06_\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGT\n+>FUOMOGO01AUK0D length=214 xy=0231_0843 region=1 run=R_2009_04_23_17_54_06_\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACACGACGACGACGAC\n+>FUOMOGO01AUB7C length=64 xy=0228_1718 region=1 run=R_2009_04_23_17_54_06_\n+ATATATATATATATATATATATATATATATATATATATATATATATATATAGTACGTACG\n+TACG\n+>FUOMOGO01AU00B length=213 xy=0236_1097 region=1 run=R_2009_04_23_17_54_06_\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACGACGACGACGACG\n+>FUOMOGO01ATYRT length=169 xy=0224_0695 region=1 run=R_2009_04_23_17_54_06_\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGT\n+>FUOMOGO01ARMLN length=400 xy=0197_2201 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATATATATA\n+TATAGTAGTAGTAGTATATATATATATATATATATATATATATATATATATATATATATA\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATATATATA\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATATATATA\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATATATATA\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATATATATA\n+TATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01AVGRX length=44 xy=0241_1051 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01ASZ6K length=315 xy=0213_0922 region=1 run=R_2009_04_23_17_54_06_\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGT\n+>FUOMOGO01ARSZF length=65 xy=0199_2281 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATATATATATATATAGTACGTAC\n+GTACG\n+>FUOMOGO01AYV8U length=49 xy=0280_1324 region=1 run=R_2009_04_23_17_54_06_\n+ATATATATATATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01AYV9X length=40 xy=0280_1363 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01AUX4M length=40 xy=0235_1460 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01AWOTU length=54 xy=0255_0800 region=1 run=R_2009_04_23_17_54_06_\n+ATATATATATATATATATATATATATATATATATATATATATATATATATAGTA\n+>FUOMOGO01A11TC length=66 xy=0316_1054 region=1 run=R_2009_04_23_17_54_06_\n+ATATATATATATATATATATATATATATATATATATATATATATATATATATAGTACGTA\n+CGTACG\n+>FUOMOGO01ASRJP length=401 xy=0210_2019 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATAGTATAT\n+AGTAGTAGTAGTATATATATATATATATATATATATATATATATATATATATATATATAT\n+ATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAT\n+ATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAT\n+ATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAT\n+ATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAT\n+ATATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01AU1ZH length=67 xy=0236_2363 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATAGTACGT\n+ACGTACG\n+=cut\n'
b
diff -r 000000000000 -r ad2b274663f8 test-data/Assembly_statistics_-_SRR1002850_SMALL.fasta.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Assembly_statistics_-_SRR1002850_SMALL.fasta.tabular Tue Nov 07 12:28:31 2017 -0500
b
@@ -0,0 +1,34 @@
+Statistics for contig lengths:
+ Min contig length: 56
+ Max contig length: 726,282
+ Mean contig length: 12811.87
+ Standard deviation of contig length: 61076.24
+ Median contig length: 331
+ N50 contig length: 203,546
+
+Statistics for numbers of contigs:
+ Number of contigs: 383
+ Number of contigs >=1kb: 109
+ Number of contigs in N50: 6
+
+Statistics for bases in the contigs:
+ Number of bases in all contigs: 4,906,947
+ Number of bases in contigs >=1kb: 4,843,765
+ GC Content of contigs: 52.18 %
+
+Simple Dinucleotide repeats:
+ Number of contigs with over 70% dinucleotode repeats: 0.00 % (0 contigs)
+ AT: 0.00 % (0 contigs)
+ CG: 0.00 % (0 contigs)
+ AC: 0.00 % (0 contigs)
+ TG: 0.00 % (0 contigs)
+ AG: 0.00 % (0 contigs)
+ TC: 0.00 % (0 contigs)
+
+Simple mononucleotide repeats:
+ Number of contigs with over 50% mononucleotode repeats: 0.00 % (0 contigs)
+ AA: 0.00 % (0 contigs)
+ TT: 0.00 % (0 contigs)
+ CC: 0.00 % (0 contigs)
+ GG: 0.00 % (0 contigs)
+
b
diff -r 000000000000 -r ad2b274663f8 test-data/SRR1002850_SMALL.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRR1002850_SMALL.fasta Tue Nov 07 12:28:31 2017 -0500
b
b'@@ -0,0 +1,766 @@\n+>NODE_384_length_56_cov_34125_ID_767\n+CTAGCCTGATTCAGTTTTATCTTTCGTCGGTGAGCGACGGTACTTTCTGACCTCGG\n+>NODE_383_length_56_cov_335_ID_765\n+CAATTGTGATAGCGTCGCGTGTGACGGTATTACAATTACACATCCTGCCCCGAAAA\n+>NODE_382_length_56_cov_201_ID_763\n+CCGTCGCTCACCGACGAAAGATAAAACGGAATCAGGCTAGCGGGGTGTCGGGGTGC\n+>NODE_377_length_57_cov_34097.5_ID_753\n+AAACCGAGGTCAGAAAGTACCGTCGCTCACCGACGAAAGATAAAACTGAATCAGGCT\n+>NODE_376_length_57_cov_200_ID_751\n+GAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTAGTGCCGCCAGGCAAATTCTTT\n+>NODE_375_length_58_cov_19568.3_ID_749\n+TAATACCGTCACACGCGACGCTATCACAATTGCCATCTGGTCAGGGGTGCACCCCGAC\n+>NODE_374_length_58_cov_29262.3_ID_747\n+GCACCCCGACACCCCGCTAGCCTGATTCAGTTTTATCTTTCGTCGGTGAGCGACGGTA\n+>NODE_372_length_59_cov_291.25_ID_743\n+TAGCCTGATTCAGTTTTATCTTTCGTCGGTGAGCGACGGGACTTTCTGACCTCGGTTTT\n+>NODE_369_length_60_cov_34761.2_ID_737\n+GCGGTGGCCGAAACCCTGAAATATTCCACGAAGCCGGCTGACATGGTGGCCGATCCTGAA\n+>NODE_367_length_60_cov_167.2_ID_733\n+CCGAGGGCGACGATGACGGTTCACGAATCGCGTTCGAGTGGAAACCCGAGGTCAGAAAGT\n+>NODE_365_length_61_cov_228.5_ID_729\n+TCGCGTTCGAGTGGAAAACCGAGGTCAGAAAGTCCCGTCGCTCACCGACGAAAGATAAAAC\n+>NODE_360_length_62_cov_66.7143_ID_719\n+CAGGGGTGCACCCCGACACCCCGCTAGCCTGATTCAGTTTTATCTTTCGTCGGTGGGCGACG\n+>NODE_357_length_63_cov_202.375_ID_713\n+GGGGTGTGGGGGTGCACCCCTGACCAGATGGCAATTGTGATAGCGTCGCGTGTGACGGTATTA\n+>NODE_356_length_63_cov_142.75_ID_711\n+TGTAATTGTAATAACGTCACACGCGACGCTATCACAATTGCCATCTGGTCAGGGGGGCACCCC\n+>NODE_355_length_64_cov_127.667_ID_709\n+CGTCGCTCCCCGACGAAAGATAAAACTGAATCAGGCTAGCGGGGTGTCGGGGTGCCCCCCTGAC\n+>NODE_352_length_64_cov_30.6667_ID_703\n+CGTCGCTCCCCGACGAAAGATAAAACTGAATCAGGCTAGGGGGGTGTCGGGGTGCACCCCTGAC\n+>NODE_351_length_64_cov_104.444_ID_701\n+ATGATATTTCCGAGGGCGACGATGACGGTTCACGAATCGCGTTCGAGTGGAAACCCGAGGTCAG\n+>NODE_349_length_64_cov_29.3333_ID_697\n+GTCAGGGGGGCACCCCGACACCCCCCTAGCCTGATTCAGTTTTATCTTTCGTCGGGGAGCGACG\n+>NODE_347_length_64_cov_32_ID_693\n+GTCAGGGGGGCACCCCGACACCCCGCTAGCCTGATTCAGTTTTTTCTTTCGTCGGGGAGCGACG\n+>NODE_346_length_65_cov_38729.2_ID_691\n+CTGACCTCGGTTTTCCACTCGAACGCGATTCGTGAACCGTCATCGTCGCCCTCGGAAATATCATC\n+>NODE_342_length_66_cov_28.1818_ID_683\n+CTGACCTCGGGTTTCCACTCGAACGCGATTCGTGAACCGTCATCGTCGCCCTCGGGAATATCATCA\n+>NODE_340_length_67_cov_71.3333_ID_679\n+CCGACGAAAGATAAAACTGAATCAGGCTAGGGGGGTGTCGGGGTGCCCCCCTGACCAGATGGCAATT\n+>NODE_336_length_67_cov_56_ID_671\n+CTTGAATATGGCGGTGAGGGGGGGATTGACTCGCTTTGCTCGCCCCTTCGGGGCAGCCCGTTCGCTG\n+>NODE_335_length_68_cov_32300.2_ID_669\n+CCCCGACACCCCGCTAGCCTGATTCAGTTTTATCTTTCGTCGGTGAGCGACGGTACTTTCTGACCTCG\n+>NODE_333_length_69_cov_126.429_ID_665\n+GGTGTCGGGGTGCCCCCCTGACCAGAGGGCAATTGTGATAGCGTCGCGTGTGACGGTATTACAATTACA\n+>NODE_332_length_69_cov_101.286_ID_663\n+GGTGTCGGGGTGCCCCCCTGACCAGATGGCATTTGTGATAGCGTCGCGTGTGACGGTATTACAATTACA\n+>NODE_327_length_71_cov_31.375_ID_653\n+GCACCCCGACACCCCCCTAGCCTGATTCAGTTTTATCTTTCGTCGGGGAGCGACGGTACTTTCTGACCTCG\n+>NODE_326_length_71_cov_108.125_ID_651\n+CCGAGGTCAGAAAGTCCCGTCGCTCCCCGACGAAAGATAAAACTGAATCAGGCTAGCGGGGTGTCGGGGTG\n+>NODE_324_length_71_cov_35.75_ID_647\n+TGTAATTGTAATACCCTCACACGCGACGCTATCACAATTGCCATCTGGTCAGGGGGGCACCCCGACACCCC\n+>NODE_318_length_74_cov_355.895_ID_635\n+GTAGGCGTTATTGGCGCAGCCAGTTTGGACACGGACAGCGCGCAAAAACCGGAGCGTACACGTAGTACGTGAGG\n+>NODE_316_length_76_cov_59.5714_ID_631\n+CGTCGCTCACCGACGAAAGAAAAAACTGAATCAGGCTAGGGGGGTGTCGGGGTGCACCCCTGACCAGATGGCAATT\n+>NODE_315_length_76_cov_76.8571_ID_629\n+CGTCGCTCACCGACGAAAGATAAAACTGAATCAGGCTAGCGGGGTGTGGGGGTGCACCCCTGACCAGATGGCAATT\n+>NODE_314_length_76_cov_172.333_ID_627\n+AATTGCCATCTGGTCAGGGGTGCACCCCGACACCCCGCTAGCCTGATTCCGTTTTATCTTTCGTCGGTGAGCGACG\n+>NODE_313_length_76_cov_35231.5_ID_625\n+AAACGGCAAAATGATTTACTCGCCGTTTGTGTGGGTTGTTTTGTGGGGTGTCAATGGGGTTGTGGTCTTTTTTGTG\n+>NODE_312_length_76_cov_28.9048_ID_623\n+CGTCGCTCACCGACGAAAGAAAAAACTGAATCAGGCTAGGGGGGTGTCGGGGTGCCCCCCTGACCAGATGGCAATT\n+>NODE_310_length_79_cov_105.5_ID_619\n+CTCAAAAAACCGAGAAACGGCGGGGATTTTCGGGACAGGATGTGTAATTGTAATAACGTCACACGCGACGCTATCACAA\n+>NODE_306_length_82_cov_64.2963_ID_611\n+TATCACAATTGCCATCTGGTCAGGGGGGCACCCCGACACCCCGCTAGCCTGATTCCGTTTTATCTTTCGTCGGTGAGCGACG\n+>NODE_294_length'..b'GGATGGCCGCGTCTTTGAAGCGCTACTGGTTGGCTCCGATTCGCTTACCGATCTGGCGGTGCTGAAGATCAACGCCACTGGCGGGCTGCCTACCATCCCGATTAATACAAAGCGTACACCGCATATTGGCGACGTCGTACTGGCTATCGGCAACCCATATAATCTGGGACAGACCATTACCCAGGGGATCATCAGCGCAACGGGTCGTATCGGCCTGAACCCGACGGGGCGACAGAATTTTCTCCAGACCGACGCCTCGATTAACCACGGTAATTCCGGCGGCGCGCTGGTCAACTCGTTAGGCGAACTGATGGGGATCAACACCCTCTCTTTTGATAAGAGTAACGATGGTGAAACGCCGGAAGGCCTTGGTTTTGCGATTCCCTTCCAGCTAGCCACGAAAATTATGGATAAGCTTATCCGCGACGGTCGTGTGATTCGCGGCTATATCGGTATTGGCGGACGAGAAATCGCGCCGCTGCACGCGCAGCAGGGTAGCGGCATGGACCCGATTCAGGGCATTGTCGTTAATGAAGTGACGCCAAACGGCCCCGCCGCGCTTGCCGGTATTCAGGTTAATGATTTGATTATTTCGGTCAATAATAAACCCGCCGTGTCCGCGCTGGAGACAATGGATCAGGTGGCGGAAATCCGCCCGGGCTCCGTCATTCCGGTCGTGGTAATGCGGGATGATAAGCAACTCACGTTCCAGGTGACGGTGCAGGAATACCCGGCGTCGAACTAAAACGACGCGGCTTTTGCCTGCGTCGTTCGACGGCTAGTCAATAAAGACTGGCCCTGCCGGAAAATGCGCCAGGAAACTCGTGAAGATCAGCAGAGCGGGGAGACTTTTCTCCCCGACGGCTTCTTCACGCAGCGCCTTATCCAGTGCAAGCAGGCTGTGCTCCCCCCCTTCCCCCAAAATGCCGCCCATCCAGAGCAGTCGTTGCGCCTGTTGACGCGCATACTGCGATGCGCTTTTCCAGGCGACCAGATGCAGCAACGTTTGTAACAGCAGGACCCGAGACGCGTGACCATCGCGTTGCAATCGCCTGAATACGGGTAATGCAATACTACGTACCGTCGAGAAACCGCTCGCCGCCTCCCCCTCAATCGGCGATAAACCAACAGCCGCCTGTTTGACGAAATCGCATAACTGATTGTGTGTTAATGGCCGCTCGCCTGCGCTCATATACCCTGCGGCAAAACAGAGTAACCCGATACCCTGAGCCCAATCGGTCGATGGCAGAATCCCCTGCGGAACATTCGGTGCCGACGTCTGTGCGGCACAAAGCCCCTCGTCGTAACACTGTGCAAAACGTTGCTCATATATTGCTGAGTTGATGCCAGACCACGGCAGTAAATGCAGCGCTGGCTCAAGCTCAGTTATCTGATTCACCGTCTCAACAACATAACGCAAGATGGTTTCTCTGGATGGCATGGATGTACCTTGCCGAAACGCAACCATTTCACTCATTACTGTGTCCTCATTTTGCGGGAGCAGAATCACTGCTCCCGATTAGATTGGATTACATTGCCAGCACGTACTTCAGCATCACGCCCGCCGCAATGGCCGAACCGATAACCCCCGCCACGTTCGGGCCCATCGCGTGCATCAGCAGGAAGTTCTGCGGGTCCGACTCCAGGCCCACCTTGTTCGATACGCGGGCCGCCATCGGCACCGCCGACACCCCCGCCGAACCGATAAGCGGGTTGATTTTGTTCTTACTGCACAGGTTCAGCAGCTTCGCCATCAGCACCCCGGCGGCAGTCCCGATACCAAAGGCAATCACACCCAGCAGCAGAATGCCCAGCGTCTGCGGCTGCAGGAACTTGTCCGCCACCAGCTTCGCCCCTACCGACAGCCCGAGGAAAATGGTGACGATGTTGATCAGCCCGTTCTGCACTGTGTCGCTCAGGCGCTCCACCACGCCGCTTTCACGCATCAGATTGCCGAAGCAGAACATCCCCAGCAGCGGCGCGGCGTCCGGCAGCAGCAGCGCCACCAGTAGTAACAGCACTACCGGGAAGAGGATTTTTTCCCGCTTACTCACCGTGCGCAGCTGCACCATGCGGATTTTCCGCTCCGTCTCCGTGGTCAGCGCGCGCATAATCGGCGGCTGGATTAACGGCACCAGCGCCATATACGAGTACGCCGCCACCGCGATGGCCCCCAGCAGCTCCGGCGCCAGTTTGCCCGACAGATAAATGGCCGTCGGACCGTCCGCGCCGCCGATGATGCCTATCGCCGCCGCCTGCGGCAGGGTGAAGGAAATCAGGCCGAAGTAATTCAGCGTCAGCGCCCCCAGCACCGTGGCGAAGATGCCGAACTGCGCCGCCGCGCCGAGCAGCAGGGTACGCGGGTTGGCCAGCAGCGGGCCGAAGTCGGTCATCGCGCCGACGCCCATAAAGATGACCAGCGGCGCGACGCCGGAGCCAATCGCCACTTTATAGAACAGCGCCAGCACCCCCGGCGTGTAGCCCATGTCCACCGCCAGGTTCTCCATCTGGCTCTGAACGGAGGGCAGCGCCAGCGCTAACGCCTCCTTGATGGCGTGCACGTCCGGCGCGCAGTTAAGCTTCGCGGCAATCACCGCCAGCTGCCCGGCGTCGTGGTGCGCCAGCAGGCTTTCCAGCGCGGTCAGCGCCATGCCCGCTTCCGGGATGTTGGAGAGCAGGCCGCCGAAGCCAATCGGCAACAGCAACAACGGCTCGAACTTTTTCGCAATCGCCAGCCACAGCAGCAGCAGGCTGACCAGCAGCATGATGGCCTGGCCTGCGCCAAGGTGCATCAGCCCCATGCCCTGAAGCAGGGCGTTCAGACTTTCCATTCGTCCTCTCCGTTACGCAAGCTGCATCAGGGTGTCGCCCACCGCCACCGCATCCCCGGCTTTCACCGCGATGCCGCGTACCGTCCCGGCCTGCGCGGCGCGGATTTCGGTTTCCATTTTCATGGCTTCCAGAATCAGCAGCACATCGCCTTCGGCCACCGTCTGGCCTTCCGTCGCCACCACTTTCCAGATATTGCCCGCCAGCGGCGCGCTGACCGGGGTGCCCGCGCCCGCCGGGGCGGCGGCAGGCGCCGCGGTCTGAACCGGCGCAGAACTGGCCGTGGCCAGCTGGCTGATATCGCCGCCGTCGCTGACTTTCACCACAAAGGCTTTGCCTTCCACTTCGACGGTGTAGATACCGGAAGCGGCGGGTTTTGCGGCTTTTTCCTCTTTCACGGCAGGTTTCGCGGCTTCCGCCTGCGGCAGTGGCTCAAACGCCGCCGGGTTATGGCGGTTTTCGAGGAATTTCAGCCCGACCTGCGGGAACAGTGCCACGGTGAGGACGTCATCAATGGCGTTCTCTGCAAGCCGGATGCCTTTTTCTGCCGCCTGGCGTTTAATGTCCGCTTCCAGTTGCGCCAGTTCCGGTTTCAGTAAATCCGCCGGGCGGCAGGTGACGGCCTCTTCGCCGTCCAGCACTCTGGCCTGTAACGCCGCGTTGACCGGCGCTGGCGTGTGGCCGTATTCGCCCTTCAGTATCCCGGCGGTTTCTTTGGCGATGGTTT\n+>NODE_216_length_189_cov_23403.1_ID_431\n+CCTGATTCAGTTTTATCTTTCGTCGGTGAGCGACGGTACTTTCTGACCTCGGTTTTTCACTCGAACGCGATTCGTGAACCGTCATCGTCGCCCTCGGAAATATCATCACCGATCACCATGTCGGCGTCAGTTTCCTGATCGAGTTTCAGCACGTCTTTGAGTGCGCCGCCGGTCGCCACAAAGCGGCGC\n+>NODE_222_length_167_cov_18131.6_ID_443\n+CACTCAAAGACGTGCTGAAACTCGATCAGGAAACTGACGCCGACATGGTGATCGGTGATGATATTTCCGAGGGCGACGATGACGGTTCACGAATCGCGTTCGAGTGAAAAACCGAGGTCAGAAAGTACCGTCGCTCCCCGACGAAAGATAAAACTGAATCAGGCTAG\n'