Next changeset 1:7556309ffbaf (2020-05-29) |
Commit message:
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 3f9ae719338c7c8db81d645b8ee09727e2d9ce23 |
added:
README_ASSEMBLY_STATS assembly_stats_txt.py assembly_stats_txt.xml fasta_summary.pl test-data/Assembly_statistics_-_SRR1002850_SMALL.fasta.tabular test-data/SRR1002850_SMALL.fasta |
b |
diff -r 000000000000 -r ad2b274663f8 README_ASSEMBLY_STATS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README_ASSEMBLY_STATS Tue Nov 07 12:28:31 2017 -0500 |
b |
@@ -0,0 +1,16 @@ +#Created 07/01/2011 +#Konrad Paszkiewicz, University of Exeter + +#Modified by Mariam Iskander and Matthew Gopez, October 13th, 2017 + +Assembly stats + +This series of scripts calculates various metrics on an input FASTA file. Typically this is most useful on either denovo genomic or transcriptomic data. + +Prerequisites: + +1. The bundled perl script fasta_summary.pl + +Limitations: + +Ideally this should output a composite dataset of some sort |
b |
diff -r 000000000000 -r ad2b274663f8 assembly_stats_txt.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/assembly_stats_txt.py Tue Nov 07 12:28:31 2017 -0500 |
[ |
@@ -0,0 +1,108 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Version 1.01 - bugs kindly corrected by Jan van Haarst +# Modified by Matthew Gopez October 13th, 2017 + +import logging +import os +import subprocess +import sys + + +log = logging.getLogger(__name__) + +assert sys.version_info[:2] >= (2, 4) + + +def stop_err(msg): + sys.stderr.write('%s\n' % msg) + sys.exit() + + +def __main__(): + + # Parse Command Line + + working_dir = sys.argv[2] + type = sys.argv[3] + bucket = sys.argv[4] + input = sys.argv[5] + stats = sys.argv[6] + sortedcontigs = sys.argv[7] + histogrampng = sys.argv[8] + summedcontigspng = sys.argv[9] + histogramdata = sys.argv[10] + summedcontigdata = sys.argv[11] + try: # for test - needs this done + os.makedirs(working_dir) + except Exception, e: + stop_err('Error running assembly_stats_txt.py ' + str(e)) + + cmdline = '%s/fasta_summary.pl -i %s -t %s %s -o %s > /dev/null' \ + % (os.path.dirname(sys.argv[0]), input, type, bucket, + working_dir) + try: + proc = subprocess.Popen(args=cmdline, shell=True, + stderr=subprocess.PIPE) + returncode = proc.wait() + + # get stderr, allowing for case where it's very large + + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += proc.stderr.read(buffsize) + if not stderr or len(stderr) % buffsize != 0: + break + except OverflowError: + pass + if returncode != 0: + raise Exception + except Exception, e: + stop_err('Error running assembly_stats.py ' + str(e)) + + stats_path = os.path.join(working_dir, 'stats.txt') + sorted_contigs_path = os.path.join(working_dir, 'sorted_contigs.fa') + histogram_png_path = os.path.join(working_dir, + 'histogram_bins.dat.png') + summed_contigs_path = os.path.join(working_dir, + 'summed_contig_lengths.dat.png') + histogram_data_path = os.path.join(working_dir, 'histogram_bins.dat') + summed_contigs_data_path = os.path.join(working_dir, + 'summed_contig_lengths.dat') + + out = open(stats, 'w') + for line in open(stats_path): + out.write('%s' % line) + out.close() + + out = open(sortedcontigs, 'w') + for line in open(sorted_contigs_path): + out.write('%s' % line) + out.close() + + out = open(histogrampng, 'w') + for line in open(histogram_png_path): + out.write('%s' % line) + out.close() + + out = open(summedcontigspng, 'w') + for line in open(summed_contigs_path): + out.write('%s' % line) + out.close() + + out = open(histogramdata, 'w') + for line in open(histogram_data_path): + out.write('%s' % line) + out.close() + + out = open(summedcontigdata, 'w') + for line in open(summed_contigs_data_path): + out.write('%s' % line) + out.close() + + +if __name__ == '__main__': + __main__() |
b |
diff -r 000000000000 -r ad2b274663f8 assembly_stats_txt.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/assembly_stats_txt.xml Tue Nov 07 12:28:31 2017 -0500 |
[ |
@@ -0,0 +1,79 @@ +<tool id="assemblystats" name="assemblystats" version="1.0.1"> + <description>Summarise an assembly (e.g. N50 metrics)</description> + <requirements> + <requirement type="package" version="1.6.924">perl-bioperl</requirement> + <requirement type="package" version="5.0.4">gnuplot</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/assembly_stats_txt.py + + '$type' + '$stats.extra_files_path' + '$type' + '$bucket' + '$input' + '$stats' + '$sortedcontigs' + '$histogrampng' + '$summedcontigspng' + '$histogramdata' + '$summedcontigdata' + ]]></command> + <inputs> + <param label="Type of read" name="type" type="select" help="Is this from an genomic (contig) or transcriptomic assembly (isotig) or are these raw reads (read)"> + <option value="contig" selected="yes">Contig (if from genomic assembly)</option> + <option value="isotig">Isotig (if from transcriptomic assembly)</option> + <option value="read">Raw reads from sequencer in FASTA format (useful for 454 data)</option> + </param> + <param name="bucket" type="boolean" label="Output histogram with bin sizes=1" truevalue="-b" falsevalue="" help="Use this to specify whether or not bin sizes of 1 should be used when plotting histograms"/> + <param format="fasta" name="input" type="data" label="Source file in FASTA format"/> + <param name = "all_outputs" type ="boolean" checked="false" label="Return all output files" help="If checked, all output files will be displayed. If not checked, only the file 'Assembly Statistics' will be provided." /> + </inputs> + <outputs> + <data format="tabular" name="stats" label="Assembly statistics - $input.display_name"/> + <data format="fasta" name="sortedcontigs" label="Sorted contigs - $input.display_name" > + <filter>all_outputs is True</filter> + </data> + <data format="png" name="histogrampng" label="Histogram of contig sizes - $input.display_name"> + <filter>all_outputs is True</filter> + </data> + <data format="png" name="summedcontigspng" label="Cumulative sum of contig sizes - $input.display_name"> + <filter>all_outputs is True</filter> + </data> + <data format="tabular" name="histogramdata" label="Histogram data - $input.display_name"> + <filter>all_outputs is True</filter> + </data> + <data format="tabular" name="summedcontigdata" label="Cumulative sum of contig size data - $input.display_name"> + <filter>all_outputs is True</filter> + </data> + </outputs> + <tests> + <test> + <param name="input" value="SRR1002850_SMALL.fasta"/> + <output name="stats" value="Assembly_statistics_-_SRR1002850_SMALL.fasta.tabular"/> + </test> + </tests> + <help><![CDATA[ + **Summarise assembly overview** + + This script is used to give summary statistics of an assembly or set of reads. Typically this is run after an assembly to evaluate gross features. + + + # Gives back + # - N50 + # - num of contigs > 1 kb + # - num of contigs + # - Read or Contig Histogram and graphs. + # - Summed contig length (by number of contigs, in sorted order) + ]]> + + </help> + <citations> + <citation type="bibtex">@ARTICLE{a1, + title = {Summarise an assembly (e.g. N50 metrics)}, + author = {Konrad Paszkiewicz, Sujai Kumar, Mariam Iskander}, + url = {https://github.com/phac-nml/galaxy_tools/} + } + }</citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r ad2b274663f8 fasta_summary.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_summary.pl Tue Nov 07 12:28:31 2017 -0500 |
[ |
b'@@ -0,0 +1,776 @@\n+#!/usr/bin/env perl\n+\n+#==============================================================================================\n+\n+# Script to output statistsics and histograms for reads and contigs/isotigs\n+\n+\n+# Outputs include:\n+# Mean, N50, StdDev or reads or contig lengths,\n+# Mean and Modal read or contig lengths.\n+# Number of reads or contigs > 1 kb in length \n+# Summed contig length (by number of contigs, in sorted order)\n+# Histogram of read or contig lengths,\n+# Graph of sums of read-lengths\n+# File of reads or contigs sorted by read or contig length\n+# Test for mono/di-nucelotide repeats\n+# Randomly selected reads or contigs\n+\n+\n+# Needs gnuplot installed to create the histograms:\n+# On Fedora/Redhat linux: sudo yum install gnuplot \n+# On Ubuntu/Debian: sudo apt-get install gnuplot\n+\n+# Uses a linux pipe to call gnu-plot directly, rather than as a separate shell script.\n+\n+# Original written by Sujai Kumar, 2008-09-05 University of Edinburgh\n+# Modified by Stephen: 29-Apr-2009:\n+# Last changed by Stephen: 9-Aug-2010\n+\n+\n+# Usage: fasta_summary.pl -i infile.fasta -o process_reads -t read OR contig OR isotig (to use \'read\' or \'contig\' or \'isotig\' in the output table & graphs. Isotig is for \'runAssembly -cdna ...\' output file \'454Isotigs.fna\') [-r 1 to indicate count simple nucleotide repeats] [-n number of random reads to output] [-c cutoff_length] [-l 1 to indicate output the longest read] [-f (s or t or w) for spacer, tab or wiki format table output.]\n+\n+# Note: The parameters above in the [] are optional.\n+\n+# eg: fasta_summary.pl -i myfile.fasta -o process_reads -t read\n+# Where:\n+# -i reads or contigs as input, in fasta format.\n+# -o output_dir (created if it doesn\'t exist)\n+# -t read, contig or isotig\n+\n+# Gives back\n+# - N50\n+# - num of contigs > 1 kb\n+# - num of contigs\n+# - Read or Contig Histogram and graphs.\n+# - Summed contig length (by number of contigs, in sorted order)\n+\n+#==============================================================================================\n+\n+\n+use strict;\n+use warnings;\n+use Getopt::Long;\n+\n+my $infile;\n+my $output_dir;\n+my $type=\'read\'; # Defaults to \'read\' at present\n+my $repeats=1;\n+my $num_random_reads_to_output=0;\n+my $cutoff_length=-1; # -1 means won\'t check this cutoff\n+my $longest_read=-1; # -1 mean\'s don\'t output the sequence for the longest read.\n+my $doCommify=1; # Outputs statistics numbers in format: 9,999,999\n+my $format="t"; # "s"=spaces between columns, "t"=tabs between columns, "w"=wiki \'||\' and \'|\'.\n+my $bucket1=0; # For optional exact length histogram distribution as asked for by JH.\n+\n+if ($#ARGV==-1) {die "\n+ Usage: \n+\n+ fasta_summary.pl -i infile.fasta -o output_dir -t ( read | contig | isotig ) [ -r 0 ] [ -n num_reads ] [ -c cutoff_length ] [ -l 1 ] [ -d 0 ] [ -f (w | t ) ] [ -bucket1 ]\n+\n+ where:\n+\n+ -i or -infile infile.fasta : input fatsa file of raeds, contigs or isotigs, \n+\n+ -o or -output_dir output_directory : directory to put output stats and graphs into.\n+\n+ -t or -type (read or contig or isotig) : for displaying the graph title, where type is \'read\' or \'contig\' or \'isotig\'.\n+\n+ -r or -repeats 0 or 1 : 1=count number of reads that contain over 70% simple mono-nucleotide and di-nucleotide repeat bases; 0=don\'t count.\n+\n+ -n or -number num_reads : For outputting specified number of randomly selected reads or contigs.\n+\n+ -c or -cutoff cutoff_length : Give a number of reads to do extra analysis (calculating again the number of reads and number of bases in reads above this length)\n+\n+ -l or -longest 0 or 1 : 1=Output the longest read; 0= don\'t output the longest read\n+\n+ -d or -doCommify 0 or 1 : Output numbers formatted with commas to make easier to read: 0=no commas, default=1\n+\n+ -f or -format w or t : w=wiki_format (ie. table with || and | for column dividers), t=tabs between column symbols for the wiki pages, default is space'..b'un=R_2009_04_23_17_54_06_\n+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+\n+>FUOMOGO01AQV42 length=339 xy=0189_0676 region=1 run=R_2009_04_23_17_54_06_\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGT\n+>FUOMOGO01AUK0D length=214 xy=0231_0843 region=1 run=R_2009_04_23_17_54_06_\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACACGACGACGACGAC\n+>FUOMOGO01AUB7C length=64 xy=0228_1718 region=1 run=R_2009_04_23_17_54_06_\n+ATATATATATATATATATATATATATATATATATATATATATATATATATAGTACGTACG\n+TACG\n+>FUOMOGO01AU00B length=213 xy=0236_1097 region=1 run=R_2009_04_23_17_54_06_\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC\n+ACACACACACACACACACACGACGACGACGACG\n+>FUOMOGO01ATYRT length=169 xy=0224_0695 region=1 run=R_2009_04_23_17_54_06_\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGT\n+>FUOMOGO01ARMLN length=400 xy=0197_2201 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATATATATA\n+TATAGTAGTAGTAGTATATATATATATATATATATATATATATATATATATATATATATA\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATATATATA\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATATATATA\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATATATATA\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATATATATA\n+TATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01AVGRX length=44 xy=0241_1051 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01ASZ6K length=315 xy=0213_0922 region=1 run=R_2009_04_23_17_54_06_\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGT\n+>FUOMOGO01ARSZF length=65 xy=0199_2281 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATATATATATATATAGTACGTAC\n+GTACG\n+>FUOMOGO01AYV8U length=49 xy=0280_1324 region=1 run=R_2009_04_23_17_54_06_\n+ATATATATATATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01AYV9X length=40 xy=0280_1363 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01AUX4M length=40 xy=0235_1460 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01AWOTU length=54 xy=0255_0800 region=1 run=R_2009_04_23_17_54_06_\n+ATATATATATATATATATATATATATATATATATATATATATATATATATAGTA\n+>FUOMOGO01A11TC length=66 xy=0316_1054 region=1 run=R_2009_04_23_17_54_06_\n+ATATATATATATATATATATATATATATATATATATATATATATATATATATAGTACGTA\n+CGTACG\n+>FUOMOGO01ASRJP length=401 xy=0210_2019 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATAGTATAT\n+AGTAGTAGTAGTATATATATATATATATATATATATATATATATATATATATATATATAT\n+ATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAT\n+ATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAT\n+ATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAT\n+ATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAT\n+ATATATATATATATATATATATATATATATATATATATATA\n+>FUOMOGO01AU1ZH length=67 xy=0236_2363 region=1 run=R_2009_04_23_17_54_06_\n+TATATATATATATATATATATATATATATATATATATATATATATATATATATAGTACGT\n+ACGTACG\n+=cut\n' |
b |
diff -r 000000000000 -r ad2b274663f8 test-data/Assembly_statistics_-_SRR1002850_SMALL.fasta.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Assembly_statistics_-_SRR1002850_SMALL.fasta.tabular Tue Nov 07 12:28:31 2017 -0500 |
b |
@@ -0,0 +1,34 @@ +Statistics for contig lengths: + Min contig length: 56 + Max contig length: 726,282 + Mean contig length: 12811.87 + Standard deviation of contig length: 61076.24 + Median contig length: 331 + N50 contig length: 203,546 + +Statistics for numbers of contigs: + Number of contigs: 383 + Number of contigs >=1kb: 109 + Number of contigs in N50: 6 + +Statistics for bases in the contigs: + Number of bases in all contigs: 4,906,947 + Number of bases in contigs >=1kb: 4,843,765 + GC Content of contigs: 52.18 % + +Simple Dinucleotide repeats: + Number of contigs with over 70% dinucleotode repeats: 0.00 % (0 contigs) + AT: 0.00 % (0 contigs) + CG: 0.00 % (0 contigs) + AC: 0.00 % (0 contigs) + TG: 0.00 % (0 contigs) + AG: 0.00 % (0 contigs) + TC: 0.00 % (0 contigs) + +Simple mononucleotide repeats: + Number of contigs with over 50% mononucleotode repeats: 0.00 % (0 contigs) + AA: 0.00 % (0 contigs) + TT: 0.00 % (0 contigs) + CC: 0.00 % (0 contigs) + GG: 0.00 % (0 contigs) + |
b |
diff -r 000000000000 -r ad2b274663f8 test-data/SRR1002850_SMALL.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SRR1002850_SMALL.fasta Tue Nov 07 12:28:31 2017 -0500 |
b |
b'@@ -0,0 +1,766 @@\n+>NODE_384_length_56_cov_34125_ID_767\n+CTAGCCTGATTCAGTTTTATCTTTCGTCGGTGAGCGACGGTACTTTCTGACCTCGG\n+>NODE_383_length_56_cov_335_ID_765\n+CAATTGTGATAGCGTCGCGTGTGACGGTATTACAATTACACATCCTGCCCCGAAAA\n+>NODE_382_length_56_cov_201_ID_763\n+CCGTCGCTCACCGACGAAAGATAAAACGGAATCAGGCTAGCGGGGTGTCGGGGTGC\n+>NODE_377_length_57_cov_34097.5_ID_753\n+AAACCGAGGTCAGAAAGTACCGTCGCTCACCGACGAAAGATAAAACTGAATCAGGCT\n+>NODE_376_length_57_cov_200_ID_751\n+GAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTAGTGCCGCCAGGCAAATTCTTT\n+>NODE_375_length_58_cov_19568.3_ID_749\n+TAATACCGTCACACGCGACGCTATCACAATTGCCATCTGGTCAGGGGTGCACCCCGAC\n+>NODE_374_length_58_cov_29262.3_ID_747\n+GCACCCCGACACCCCGCTAGCCTGATTCAGTTTTATCTTTCGTCGGTGAGCGACGGTA\n+>NODE_372_length_59_cov_291.25_ID_743\n+TAGCCTGATTCAGTTTTATCTTTCGTCGGTGAGCGACGGGACTTTCTGACCTCGGTTTT\n+>NODE_369_length_60_cov_34761.2_ID_737\n+GCGGTGGCCGAAACCCTGAAATATTCCACGAAGCCGGCTGACATGGTGGCCGATCCTGAA\n+>NODE_367_length_60_cov_167.2_ID_733\n+CCGAGGGCGACGATGACGGTTCACGAATCGCGTTCGAGTGGAAACCCGAGGTCAGAAAGT\n+>NODE_365_length_61_cov_228.5_ID_729\n+TCGCGTTCGAGTGGAAAACCGAGGTCAGAAAGTCCCGTCGCTCACCGACGAAAGATAAAAC\n+>NODE_360_length_62_cov_66.7143_ID_719\n+CAGGGGTGCACCCCGACACCCCGCTAGCCTGATTCAGTTTTATCTTTCGTCGGTGGGCGACG\n+>NODE_357_length_63_cov_202.375_ID_713\n+GGGGTGTGGGGGTGCACCCCTGACCAGATGGCAATTGTGATAGCGTCGCGTGTGACGGTATTA\n+>NODE_356_length_63_cov_142.75_ID_711\n+TGTAATTGTAATAACGTCACACGCGACGCTATCACAATTGCCATCTGGTCAGGGGGGCACCCC\n+>NODE_355_length_64_cov_127.667_ID_709\n+CGTCGCTCCCCGACGAAAGATAAAACTGAATCAGGCTAGCGGGGTGTCGGGGTGCCCCCCTGAC\n+>NODE_352_length_64_cov_30.6667_ID_703\n+CGTCGCTCCCCGACGAAAGATAAAACTGAATCAGGCTAGGGGGGTGTCGGGGTGCACCCCTGAC\n+>NODE_351_length_64_cov_104.444_ID_701\n+ATGATATTTCCGAGGGCGACGATGACGGTTCACGAATCGCGTTCGAGTGGAAACCCGAGGTCAG\n+>NODE_349_length_64_cov_29.3333_ID_697\n+GTCAGGGGGGCACCCCGACACCCCCCTAGCCTGATTCAGTTTTATCTTTCGTCGGGGAGCGACG\n+>NODE_347_length_64_cov_32_ID_693\n+GTCAGGGGGGCACCCCGACACCCCGCTAGCCTGATTCAGTTTTTTCTTTCGTCGGGGAGCGACG\n+>NODE_346_length_65_cov_38729.2_ID_691\n+CTGACCTCGGTTTTCCACTCGAACGCGATTCGTGAACCGTCATCGTCGCCCTCGGAAATATCATC\n+>NODE_342_length_66_cov_28.1818_ID_683\n+CTGACCTCGGGTTTCCACTCGAACGCGATTCGTGAACCGTCATCGTCGCCCTCGGGAATATCATCA\n+>NODE_340_length_67_cov_71.3333_ID_679\n+CCGACGAAAGATAAAACTGAATCAGGCTAGGGGGGTGTCGGGGTGCCCCCCTGACCAGATGGCAATT\n+>NODE_336_length_67_cov_56_ID_671\n+CTTGAATATGGCGGTGAGGGGGGGATTGACTCGCTTTGCTCGCCCCTTCGGGGCAGCCCGTTCGCTG\n+>NODE_335_length_68_cov_32300.2_ID_669\n+CCCCGACACCCCGCTAGCCTGATTCAGTTTTATCTTTCGTCGGTGAGCGACGGTACTTTCTGACCTCG\n+>NODE_333_length_69_cov_126.429_ID_665\n+GGTGTCGGGGTGCCCCCCTGACCAGAGGGCAATTGTGATAGCGTCGCGTGTGACGGTATTACAATTACA\n+>NODE_332_length_69_cov_101.286_ID_663\n+GGTGTCGGGGTGCCCCCCTGACCAGATGGCATTTGTGATAGCGTCGCGTGTGACGGTATTACAATTACA\n+>NODE_327_length_71_cov_31.375_ID_653\n+GCACCCCGACACCCCCCTAGCCTGATTCAGTTTTATCTTTCGTCGGGGAGCGACGGTACTTTCTGACCTCG\n+>NODE_326_length_71_cov_108.125_ID_651\n+CCGAGGTCAGAAAGTCCCGTCGCTCCCCGACGAAAGATAAAACTGAATCAGGCTAGCGGGGTGTCGGGGTG\n+>NODE_324_length_71_cov_35.75_ID_647\n+TGTAATTGTAATACCCTCACACGCGACGCTATCACAATTGCCATCTGGTCAGGGGGGCACCCCGACACCCC\n+>NODE_318_length_74_cov_355.895_ID_635\n+GTAGGCGTTATTGGCGCAGCCAGTTTGGACACGGACAGCGCGCAAAAACCGGAGCGTACACGTAGTACGTGAGG\n+>NODE_316_length_76_cov_59.5714_ID_631\n+CGTCGCTCACCGACGAAAGAAAAAACTGAATCAGGCTAGGGGGGTGTCGGGGTGCACCCCTGACCAGATGGCAATT\n+>NODE_315_length_76_cov_76.8571_ID_629\n+CGTCGCTCACCGACGAAAGATAAAACTGAATCAGGCTAGCGGGGTGTGGGGGTGCACCCCTGACCAGATGGCAATT\n+>NODE_314_length_76_cov_172.333_ID_627\n+AATTGCCATCTGGTCAGGGGTGCACCCCGACACCCCGCTAGCCTGATTCCGTTTTATCTTTCGTCGGTGAGCGACG\n+>NODE_313_length_76_cov_35231.5_ID_625\n+AAACGGCAAAATGATTTACTCGCCGTTTGTGTGGGTTGTTTTGTGGGGTGTCAATGGGGTTGTGGTCTTTTTTGTG\n+>NODE_312_length_76_cov_28.9048_ID_623\n+CGTCGCTCACCGACGAAAGAAAAAACTGAATCAGGCTAGGGGGGTGTCGGGGTGCCCCCCTGACCAGATGGCAATT\n+>NODE_310_length_79_cov_105.5_ID_619\n+CTCAAAAAACCGAGAAACGGCGGGGATTTTCGGGACAGGATGTGTAATTGTAATAACGTCACACGCGACGCTATCACAA\n+>NODE_306_length_82_cov_64.2963_ID_611\n+TATCACAATTGCCATCTGGTCAGGGGGGCACCCCGACACCCCGCTAGCCTGATTCCGTTTTATCTTTCGTCGGTGAGCGACG\n+>NODE_294_length'..b'GGATGGCCGCGTCTTTGAAGCGCTACTGGTTGGCTCCGATTCGCTTACCGATCTGGCGGTGCTGAAGATCAACGCCACTGGCGGGCTGCCTACCATCCCGATTAATACAAAGCGTACACCGCATATTGGCGACGTCGTACTGGCTATCGGCAACCCATATAATCTGGGACAGACCATTACCCAGGGGATCATCAGCGCAACGGGTCGTATCGGCCTGAACCCGACGGGGCGACAGAATTTTCTCCAGACCGACGCCTCGATTAACCACGGTAATTCCGGCGGCGCGCTGGTCAACTCGTTAGGCGAACTGATGGGGATCAACACCCTCTCTTTTGATAAGAGTAACGATGGTGAAACGCCGGAAGGCCTTGGTTTTGCGATTCCCTTCCAGCTAGCCACGAAAATTATGGATAAGCTTATCCGCGACGGTCGTGTGATTCGCGGCTATATCGGTATTGGCGGACGAGAAATCGCGCCGCTGCACGCGCAGCAGGGTAGCGGCATGGACCCGATTCAGGGCATTGTCGTTAATGAAGTGACGCCAAACGGCCCCGCCGCGCTTGCCGGTATTCAGGTTAATGATTTGATTATTTCGGTCAATAATAAACCCGCCGTGTCCGCGCTGGAGACAATGGATCAGGTGGCGGAAATCCGCCCGGGCTCCGTCATTCCGGTCGTGGTAATGCGGGATGATAAGCAACTCACGTTCCAGGTGACGGTGCAGGAATACCCGGCGTCGAACTAAAACGACGCGGCTTTTGCCTGCGTCGTTCGACGGCTAGTCAATAAAGACTGGCCCTGCCGGAAAATGCGCCAGGAAACTCGTGAAGATCAGCAGAGCGGGGAGACTTTTCTCCCCGACGGCTTCTTCACGCAGCGCCTTATCCAGTGCAAGCAGGCTGTGCTCCCCCCCTTCCCCCAAAATGCCGCCCATCCAGAGCAGTCGTTGCGCCTGTTGACGCGCATACTGCGATGCGCTTTTCCAGGCGACCAGATGCAGCAACGTTTGTAACAGCAGGACCCGAGACGCGTGACCATCGCGTTGCAATCGCCTGAATACGGGTAATGCAATACTACGTACCGTCGAGAAACCGCTCGCCGCCTCCCCCTCAATCGGCGATAAACCAACAGCCGCCTGTTTGACGAAATCGCATAACTGATTGTGTGTTAATGGCCGCTCGCCTGCGCTCATATACCCTGCGGCAAAACAGAGTAACCCGATACCCTGAGCCCAATCGGTCGATGGCAGAATCCCCTGCGGAACATTCGGTGCCGACGTCTGTGCGGCACAAAGCCCCTCGTCGTAACACTGTGCAAAACGTTGCTCATATATTGCTGAGTTGATGCCAGACCACGGCAGTAAATGCAGCGCTGGCTCAAGCTCAGTTATCTGATTCACCGTCTCAACAACATAACGCAAGATGGTTTCTCTGGATGGCATGGATGTACCTTGCCGAAACGCAACCATTTCACTCATTACTGTGTCCTCATTTTGCGGGAGCAGAATCACTGCTCCCGATTAGATTGGATTACATTGCCAGCACGTACTTCAGCATCACGCCCGCCGCAATGGCCGAACCGATAACCCCCGCCACGTTCGGGCCCATCGCGTGCATCAGCAGGAAGTTCTGCGGGTCCGACTCCAGGCCCACCTTGTTCGATACGCGGGCCGCCATCGGCACCGCCGACACCCCCGCCGAACCGATAAGCGGGTTGATTTTGTTCTTACTGCACAGGTTCAGCAGCTTCGCCATCAGCACCCCGGCGGCAGTCCCGATACCAAAGGCAATCACACCCAGCAGCAGAATGCCCAGCGTCTGCGGCTGCAGGAACTTGTCCGCCACCAGCTTCGCCCCTACCGACAGCCCGAGGAAAATGGTGACGATGTTGATCAGCCCGTTCTGCACTGTGTCGCTCAGGCGCTCCACCACGCCGCTTTCACGCATCAGATTGCCGAAGCAGAACATCCCCAGCAGCGGCGCGGCGTCCGGCAGCAGCAGCGCCACCAGTAGTAACAGCACTACCGGGAAGAGGATTTTTTCCCGCTTACTCACCGTGCGCAGCTGCACCATGCGGATTTTCCGCTCCGTCTCCGTGGTCAGCGCGCGCATAATCGGCGGCTGGATTAACGGCACCAGCGCCATATACGAGTACGCCGCCACCGCGATGGCCCCCAGCAGCTCCGGCGCCAGTTTGCCCGACAGATAAATGGCCGTCGGACCGTCCGCGCCGCCGATGATGCCTATCGCCGCCGCCTGCGGCAGGGTGAAGGAAATCAGGCCGAAGTAATTCAGCGTCAGCGCCCCCAGCACCGTGGCGAAGATGCCGAACTGCGCCGCCGCGCCGAGCAGCAGGGTACGCGGGTTGGCCAGCAGCGGGCCGAAGTCGGTCATCGCGCCGACGCCCATAAAGATGACCAGCGGCGCGACGCCGGAGCCAATCGCCACTTTATAGAACAGCGCCAGCACCCCCGGCGTGTAGCCCATGTCCACCGCCAGGTTCTCCATCTGGCTCTGAACGGAGGGCAGCGCCAGCGCTAACGCCTCCTTGATGGCGTGCACGTCCGGCGCGCAGTTAAGCTTCGCGGCAATCACCGCCAGCTGCCCGGCGTCGTGGTGCGCCAGCAGGCTTTCCAGCGCGGTCAGCGCCATGCCCGCTTCCGGGATGTTGGAGAGCAGGCCGCCGAAGCCAATCGGCAACAGCAACAACGGCTCGAACTTTTTCGCAATCGCCAGCCACAGCAGCAGCAGGCTGACCAGCAGCATGATGGCCTGGCCTGCGCCAAGGTGCATCAGCCCCATGCCCTGAAGCAGGGCGTTCAGACTTTCCATTCGTCCTCTCCGTTACGCAAGCTGCATCAGGGTGTCGCCCACCGCCACCGCATCCCCGGCTTTCACCGCGATGCCGCGTACCGTCCCGGCCTGCGCGGCGCGGATTTCGGTTTCCATTTTCATGGCTTCCAGAATCAGCAGCACATCGCCTTCGGCCACCGTCTGGCCTTCCGTCGCCACCACTTTCCAGATATTGCCCGCCAGCGGCGCGCTGACCGGGGTGCCCGCGCCCGCCGGGGCGGCGGCAGGCGCCGCGGTCTGAACCGGCGCAGAACTGGCCGTGGCCAGCTGGCTGATATCGCCGCCGTCGCTGACTTTCACCACAAAGGCTTTGCCTTCCACTTCGACGGTGTAGATACCGGAAGCGGCGGGTTTTGCGGCTTTTTCCTCTTTCACGGCAGGTTTCGCGGCTTCCGCCTGCGGCAGTGGCTCAAACGCCGCCGGGTTATGGCGGTTTTCGAGGAATTTCAGCCCGACCTGCGGGAACAGTGCCACGGTGAGGACGTCATCAATGGCGTTCTCTGCAAGCCGGATGCCTTTTTCTGCCGCCTGGCGTTTAATGTCCGCTTCCAGTTGCGCCAGTTCCGGTTTCAGTAAATCCGCCGGGCGGCAGGTGACGGCCTCTTCGCCGTCCAGCACTCTGGCCTGTAACGCCGCGTTGACCGGCGCTGGCGTGTGGCCGTATTCGCCCTTCAGTATCCCGGCGGTTTCTTTGGCGATGGTTT\n+>NODE_216_length_189_cov_23403.1_ID_431\n+CCTGATTCAGTTTTATCTTTCGTCGGTGAGCGACGGTACTTTCTGACCTCGGTTTTTCACTCGAACGCGATTCGTGAACCGTCATCGTCGCCCTCGGAAATATCATCACCGATCACCATGTCGGCGTCAGTTTCCTGATCGAGTTTCAGCACGTCTTTGAGTGCGCCGCCGGTCGCCACAAAGCGGCGC\n+>NODE_222_length_167_cov_18131.6_ID_443\n+CACTCAAAGACGTGCTGAAACTCGATCAGGAAACTGACGCCGACATGGTGATCGGTGATGATATTTCCGAGGGCGACGATGACGGTTCACGAATCGCGTTCGAGTGAAAAACCGAGGTCAGAAAGTACCGTCGCTCCCCGACGAAAGATAAAACTGAATCAGGCTAG\n' |