Repository 'shasta'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/shasta

Changeset 0:60573349e9ae (2020-11-11)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/shasta commit 6d14992ea4fb1af09373d51b3a48166afcbd3a74"
added:
configs/Nanopore-Dec2019.conf
configs/Nanopore-Jun2020.conf
configs/Nanopore-OldGuppy-Sep2020.conf
configs/Nanopore-Sep2020.conf
configs/Nanopore-UL-Dec2019.conf
configs/Nanopore-UL-Jun2020.conf
configs/Nanopore-UL-Sep2020.conf
configs/Nanopore-UL-iterative-Sep2020.conf
configs/PacBio-CCS-Dec2019.conf
configs/PacBio-CLR-Dec2019.conf
macros.xml
shasta.xml
test-data/nanopore.fasta.gz
test-data/out_config.txt
test-data/out_config2.txt
test-data/out_fasta.fasta
test-data/out_gfa.gfa1
test-data/out_reads.csv
b
diff -r 000000000000 -r 60573349e9ae configs/Nanopore-Dec2019.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/Nanopore-Dec2019.conf Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,51 @@
+# This file contains Shasta options that, as of December 2019,
+# are known to work with Oxford Nanopore reads under the following 
+# circumstances:
+
+# - Human genome assembly.
+# - Coverage between 40x and 80x. If you have more coverage than that,
+#   you can use option "--Reads.minReadLength" to adjust coverage as desired.
+# - Reads from Guppy 3.0.5 base caller. Also known to work with
+#   reads from other Guppy releases 3.0.x and 3.1.x.
+
+# To use this configuration file, specify Shasta option "--config PathToThisFile". 
+# If you specify any conflicting values on the command line,
+# the values specified on the command line take precedence.
+
+# In most cases, for best performance on a large assembly 
+# you will usually also want to use the following options, which 
+# cannot be specified in a configuration file:
+# --memoryMode filesystem
+# --memoryBacking 2M
+# Using these options require root access.
+
+# For detailed information on all available options see here:
+# https://chanzuckerberg.github.io/shasta/CommandLineOptions.html
+
+# For information on running a small assembly for which 
+# performance is not essential see here:
+# https://chanzuckerberg.github.io/shasta/QuickStart.html
+
+# For more information on running an assembly see here:
+# https://chanzuckerberg.github.io/shasta/Running.html
+
+# For information on optimizing assembly performance see here:
+# https://chanzuckerberg.github.io/shasta/Performance.html
+
+
+
+[Reads]
+# If you have extra coverage, use this option to adjust coverage.
+minReadLength = 10000
+
+[MinHash]
+minBucketSize = 5
+maxBucketSize = 30
+minFrequency = 5
+
+[Align]
+minAlignedFraction = 0.4
+
+[Assembly]
+consensusCaller = Bayesian:guppy-3.0.5-a
+
b
diff -r 000000000000 -r 60573349e9ae configs/Nanopore-Jun2020.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/Nanopore-Jun2020.conf Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,74 @@
+# DO NOT USE THIS FILE IF YOU HAVE READS CREATED BY A
+# GUPPY VERSION OLDER THAN 3.6.0.
+
+# This file contains Shasta options that, as of June 2020,
+# are known to work with Oxford Nanopore reads under the following 
+# circumstances:
+
+# - Human genome assembly.
+# - Coverage between 40x and 80x. If you have more coverage than that,
+#   you can use option "--Reads.minReadLength" to adjust coverage as desired.
+# - Reads from Guppy 3.6.0 base caller. If you have reads
+#   from an older version of Guppy, use configuration file
+#   Nanopore-Dec2019.conf instead.
+
+# To use this configuration file, specify Shasta option 
+# "--config AbsolutePathToThisFile". 
+# If you specify any conflicting values on the command line,
+# the values specified on the command line take precedence.
+
+# In most cases, for best performance on a large assembly 
+# you will usually also want to use the following options, which 
+# cannot be specified in a configuration file:
+# --memoryMode filesystem
+# --memoryBacking 2M
+# Using these options requires root access.
+
+# For detailed information on all available options see here:
+# https://chanzuckerberg.github.io/shasta/CommandLineOptions.html
+
+# For information on running a small assembly for which 
+# performance is not essential see here:
+# https://chanzuckerberg.github.io/shasta/QuickStart.html
+
+# For more information on running an assembly see here:
+# https://chanzuckerberg.github.io/shasta/Running.html
+
+# For information on optimizing assembly performance see here:
+# https://chanzuckerberg.github.io/shasta/Performance.html
+
+
+
+[Reads]
+# If you have extra coverage, use this option to adjust coverage.
+minReadLength = 10000
+noCache = True
+
+[Kmers]
+# Due to the higher accuracy of Guppy 3.6.0 we use longer
+# markers than usual.
+k = 14
+
+[MinHash]
+minBucketSize = 5
+maxBucketSize = 30
+minFrequency = 5
+
+[Align]
+alignMethod = 3
+downsamplingFactor = 0.05
+matchScore = 6
+minAlignedFraction = 0.55
+minAlignedMarkerCount = 400
+sameChannelReadAlignment.suppressDeltaThreshold = 30
+
+[MarkerGraph]
+simplifyMaxLength = 10,100,1000,10000,100000
+refineThreshold = 6
+crossEdgeCoverageThreshold = 3
+
+[Assembly]
+consensusCaller = Bayesian:guppy-3.6.0-a
+detangleMethod = 1
+
+
b
diff -r 000000000000 -r 60573349e9ae configs/Nanopore-OldGuppy-Sep2020.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/Nanopore-OldGuppy-Sep2020.conf Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,83 @@
+# This file contains Shasta options which attempt to partially automate
+# parameter selection. It is based on an earlier config, which, as of Jun 2020,
+# was known to work with Oxford Nanopore reads under the following circumstances:
+
+# - Human genome assembly.
+# - Coverage between 40x and 80x. If you have more coverage than that,
+#   you can use option "--Reads.minReadLength" to adjust coverage as desired.
+# - Reads from Guppy 3.0.5 base caller. Also known to work with
+#   reads from other Guppy releases 3.0.x and 3.1.x.
+
+# The automation provided by this config is particularly applicable to
+# low coverage or non-human samples. It also matches or exceeds continuity
+# in human samples, relative to the appropriately chosen config file.
+# Updating to guppy 3.6.0 or higher will greatly improve assembly
+# quality and is therefore strongly recommended.
+
+# To use this configuration file, specify Shasta option
+# "--config AbsolutePathToThisFile".
+# If you specify any conflicting values on the command line,
+# the values specified on the command line take precedence.
+
+# In most cases, for best performance on a large assembly
+# you will usually also want to use the following options, which 
+# cannot be specified in a configuration file:
+# --memoryMode filesystem
+# --memoryBacking 2M
+# Using these options requires root access.
+
+# For detailed information on all available options see here:
+# https://chanzuckerberg.github.io/shasta/CommandLineOptions.html
+
+# For information on running a small assembly for which 
+# performance is not essential see here:
+# https://chanzuckerberg.github.io/shasta/QuickStart.html
+
+# For more information on running an assembly see here:
+# https://chanzuckerberg.github.io/shasta/Running.html
+
+# For information on optimizing assembly performance see here:
+# https://chanzuckerberg.github.io/shasta/Performance.html
+
+
+
+[Reads]
+# If you have extra coverage, use this option to adjust coverage.
+minReadLength = 10000
+noCache = True
+
+[MinHash]
+minBucketSize = 5
+maxBucketSize = 30
+minFrequency = 5
+
+[Align]
+alignMethod = 3
+downsamplingFactor = 0.05
+matchScore = 6
+sameChannelReadAlignment.suppressDeltaThreshold = 30
+
+# The following Align parameters are set to very permissive values to allow the majority of alignments
+# to be assessed during the initial stage of automatic alignment parameter selection
+maxSkip = 100
+maxDrift = 100
+maxTrim = 100
+minAlignedMarkerCount = 10
+minAlignedFraction = 0.1
+
+[ReadGraph]
+# This method uses the observed distribution of alignment stats to choose a cutoff for
+# maxSkip, maxDrift, maxTrim, minAlignedMarkerCount, and minAlignedFraction
+creationMethod = 2
+
+[MarkerGraph]
+simplifyMaxLength = 10,100,1000,10000,100000
+crossEdgeCoverageThreshold = 3
+
+# Automatically determine this using PeakFinder
+minCoverage = 0
+
+[Assembly]
+consensusCaller = Bayesian:guppy-3.0.5-a
+detangleMethod = 2
+
b
diff -r 000000000000 -r 60573349e9ae configs/Nanopore-Sep2020.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/Nanopore-Sep2020.conf Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,94 @@
+# DO NOT USE THIS FILE IF YOU HAVE READS CREATED BY A
+# GUPPY VERSION OLDER THAN 3.6.0.
+
+# This file contains Shasta options which attempt to partially automate
+# parameter selection. It is based on an earlier config, which, as of Jun 2020,
+# was known to work with Oxford Nanopore reads under the following circumstances:
+
+# - Human genome assembly.
+# - Coverage between 40x and 80x. If you have more coverage than that,
+#   you can use option "--Reads.minReadLength" to adjust coverage as desired.
+# - Reads from Guppy 3.6.0 base caller. If you have reads
+#   from an older version of Guppy, use configuration file
+#   Nanopore-Dec2019.conf instead.
+
+# The automation provided by this config is particularly applicable to
+# low coverage or non-human samples. It also matches or exceeds continuity
+# in human samples, relative to the appropriately chosen 3.6.0 or 3.6.0-UL conf.
+# Automation can also be activated with parameters designed for earlier basecallers,
+# if needed, but updating to guppy 3.6.0 or higher will greatly improve assembly
+# quality and is therefore strongly recommended.
+
+# To use this configuration file, specify Shasta option 
+# "--config AbsolutePathToThisFile". 
+# If you specify any conflicting values on the command line,
+# the values specified on the command line take precedence.
+
+# In most cases, for best performance on a large assembly 
+# you will usually also want to use the following options, which 
+# cannot be specified in a configuration file:
+# --memoryMode filesystem
+# --memoryBacking 2M
+# Using these options requires root access.
+
+# For detailed information on all available options see here:
+# https://chanzuckerberg.github.io/shasta/CommandLineOptions.html
+
+# For information on running a small assembly for which 
+# performance is not essential see here:
+# https://chanzuckerberg.github.io/shasta/QuickStart.html
+
+# For more information on running an assembly see here:
+# https://chanzuckerberg.github.io/shasta/Running.html
+
+# For information on optimizing assembly performance see here:
+# https://chanzuckerberg.github.io/shasta/Performance.html
+
+
+
+[Reads]
+# If you have extra coverage, use this option to adjust coverage.
+minReadLength = 10000
+noCache = True
+
+[Kmers]
+# Due to the higher accuracy of Guppy 3.6.0 we use longer
+# markers than usual.
+k = 14
+
+[MinHash]
+minBucketSize = 5
+maxBucketSize = 30
+minFrequency = 5
+
+[Align]
+alignMethod = 3
+downsamplingFactor = 0.05
+matchScore = 6
+sameChannelReadAlignment.suppressDeltaThreshold = 30
+
+# The following Align parameters are set to very permissive values to allow the majority of alignments
+# to be assessed during the initial stage of automatic alignment parameter selection
+maxSkip = 100
+maxDrift = 100
+maxTrim = 100
+minAlignedMarkerCount = 10
+minAlignedFraction = 0.1
+
+[ReadGraph]
+# This method uses the observed distribution of alignment stats to choose a cutoff for
+# maxSkip, maxDrift, maxTrim, minAlignedMarkerCount, and minAlignedFraction
+creationMethod = 2
+
+[MarkerGraph]
+simplifyMaxLength = 10,100,1000,10000,100000
+crossEdgeCoverageThreshold = 3
+
+# Automatically determine this using PeakFinder
+minCoverage = 0
+
+[Assembly]
+consensusCaller = Bayesian:guppy-3.6.0-a
+detangleMethod = 2
+
+
b
diff -r 000000000000 -r 60573349e9ae configs/Nanopore-UL-Dec2019.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/Nanopore-UL-Dec2019.conf Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,53 @@
+# This file contains Shasta options that, as of December 2019,
+# are known to work with Ultra-Long (UL) Oxford Nanopore reads 
+# under the following circumstances:
+
+# - Human genome assembly.
+# - Coverage between 40x and 80x. If you have more coverage than that,
+#   you can use option "--Reads.minReadLength" to adjust coverage as desired.
+# - Reads from Guppy 3.0.5 base caller. Also known to work with
+#   reads from other Guppy releases 3.0.x and 3.1.x.
+
+# To use this configuration file, specify Shasta option "--config PathToThisFile". 
+# If you specify any conflicting values on the command line,
+# the values specified on the command line take precedence.
+
+# In most cases, for best performance on a large assembly 
+# you will usually also want to use the following options, which 
+# cannot be specified in a configuration file:
+# --memoryMode filesystem
+# --memoryBacking 2M
+# Using these options require root access.
+
+# For detailed information on all available options see here:
+# https://chanzuckerberg.github.io/shasta/CommandLineOptions.html
+
+# For information on running a small assembly for which 
+# performance is not essential see here:
+# https://chanzuckerberg.github.io/shasta/QuickStart.html
+
+# For more information on running an assembly see here:
+# https://chanzuckerberg.github.io/shasta/Running.html
+
+# For information on optimizing assembly performance see here:
+# https://chanzuckerberg.github.io/shasta/Performance.html
+
+
+
+[Reads]
+# If you have extra coverage, use this option to adjust coverage.
+minReadLength = 50000
+
+[MinHash]
+minBucketSize = 5
+maxBucketSize = 40
+minFrequency = 10
+
+[Align]
+maxSkip = 60
+maxDrift = 60
+minAlignedMarkerCount = 400
+
+[Assembly]
+consensusCaller = Bayesian:guppy-3.0.5-a
+
b
diff -r 000000000000 -r 60573349e9ae configs/Nanopore-UL-Jun2020.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/Nanopore-UL-Jun2020.conf Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,78 @@
+# DO NOT USE THIS FILE IF YOU HAVE READS CREATED BY A
+# GUPPY VERSION OLDER THAN 3.6.0.
+
+# This file contains Shasta options that, as of June 2020,
+# are known to work with Ultra-Long (UL) Oxford Nanopore reads 
+# under the following circumstances:
+
+# - Human genome assembly.
+# - Coverage between 40x and 80x. If you have more coverage than that,
+#   you can use option "--Reads.minReadLength" to adjust coverage as desired.
+# - Reads from Guppy 3.6.0 base caller. If you have reads
+#   from an older version of Guppy, use configuration file
+#   Nanopore-UL-Dec2019.conf instead.
+
+# To use this configuration file, specify Shasta option 
+# "--config AbsolutePathToThisFile". 
+# If you specify any conflicting values on the command line,
+# the values specified on the command line take precedence.
+
+# In most cases, for best performance on a large assembly 
+# you will usually also want to use the following options, which 
+# cannot be specified in a configuration file:
+# --memoryMode filesystem
+# --memoryBacking 2M
+# Using these options requires root access.
+
+# For detailed information on all available options see here:
+# https://chanzuckerberg.github.io/shasta/CommandLineOptions.html
+
+# For information on running a small assembly for which 
+# performance is not essential see here:
+# https://chanzuckerberg.github.io/shasta/QuickStart.html
+
+# For more information on running an assembly see here:
+# https://chanzuckerberg.github.io/shasta/Running.html
+
+# For information on optimizing assembly performance see here:
+# https://chanzuckerberg.github.io/shasta/Performance.html
+
+
+
+[Reads]
+# If you have extra coverage, use this option to adjust coverage.
+minReadLength = 50000
+noCache = True
+
+[Kmers]
+# Due to the higher accuracy of Guppy 3.6.0 we use longer
+# markers than usual.
+k = 14
+
+[MinHash]
+minBucketSize = 10
+maxBucketSize = 30
+minFrequency = 5
+
+[Align]
+alignMethod = 3
+downsamplingFactor = 0.05
+matchScore = 6
+minAlignedFraction = 0.55
+minAlignedMarkerCount = 600
+sameChannelReadAlignment.suppressDeltaThreshold = 30
+
+[ReadGraph]
+maxAlignmentCount = 12
+
+
+[MarkerGraph]
+simplifyMaxLength = 10,100,1000,10000,100000
+refineThreshold = 6
+crossEdgeCoverageThreshold = 3
+
+[Assembly]
+consensusCaller = Bayesian:guppy-3.6.0-a
+detangleMethod = 1
+
+
b
diff -r 000000000000 -r 60573349e9ae configs/Nanopore-UL-Sep2020.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/Nanopore-UL-Sep2020.conf Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,89 @@
+# DO NOT USE THIS FILE IF YOU HAVE READS CREATED BY A
+# GUPPY VERSION OLDER THAN 3.6.0.
+
+# This file contains Shasta options which attempt to partially automate
+# parameter selection. It is based on an earlier config, which, as of Jun 2020,
+# was known to work with Oxford Nanopore reads under the following circumstances:
+
+# - Human genome assembly.
+# - Coverage between 40x and 80x. If you have more coverage than that,
+#   you can use option "--Reads.minReadLength" to adjust coverage as desired.
+# - Reads from Guppy 3.6.0 base caller. If you have reads
+#   from an older version of Guppy, use configuration file
+#   Nanopore-UL-Dec2019.conf instead.
+
+# The automation provided by this config is particularly applicable to
+# low coverage or non-human samples. It also matches or exceeds continuity
+# in human samples, relative to the appropriately chosen 3.6.0 or 3.6.0-UL conf.
+# Automation can be activated with parameters designed for earlier basecallers,
+# if needed, but updating to guppy 3.6.0 or higher will greatly improve assembly
+# quality and is therefore strongly recommended.
+
+# In most cases, for best performance on a large assembly 
+# you will usually also want to use the following options, which 
+# cannot be specified in a configuration file:
+# --memoryMode filesystem
+# --memoryBacking 2M
+# Using these options requires root access.
+
+# For detailed information on all available options see here:
+# https://chanzuckerberg.github.io/shasta/CommandLineOptions.html
+
+# For information on running a small assembly for which 
+# performance is not essential see here:
+# https://chanzuckerberg.github.io/shasta/QuickStart.html
+
+# For more information on running an assembly see here:
+# https://chanzuckerberg.github.io/shasta/Running.html
+
+# For information on optimizing assembly performance see here:
+# https://chanzuckerberg.github.io/shasta/Performance.html
+
+
+
+[Reads]
+# If you have extra coverage, use this option to adjust coverage.
+minReadLength = 50000
+noCache = True
+
+[Kmers]
+# Due to the higher accuracy of Guppy 3.6.0 we use longer
+# markers than usual.
+k = 14
+
+[MinHash]
+minBucketSize = 10
+maxBucketSize = 30
+minFrequency = 5
+
+[Align]
+alignMethod = 3
+downsamplingFactor = 0.05
+matchScore = 6
+sameChannelReadAlignment.suppressDeltaThreshold = 30
+
+# The following Align parameters are set to very permissive values to allow the majority of alignments
+# to be assessed during the initial stage of automatic alignment parameter selection
+maxSkip = 100
+maxDrift = 100
+maxTrim = 100
+minAlignedMarkerCount = 10
+minAlignedFraction = 0.1
+
+[ReadGraph]
+# This method uses the observed distribution of alignment stats to choose a cutoff for
+# maxSkip, maxDrift, maxTrim, minAlignedMarkerCount, and minAlignedFraction
+creationMethod = 2
+
+[MarkerGraph]
+simplifyMaxLength = 10,100,1000,10000,100000
+crossEdgeCoverageThreshold = 3
+
+# Automatically determine this using PeakFinder
+minCoverage = 0
+
+[Assembly]
+consensusCaller = Bayesian:guppy-3.6.0-a
+detangleMethod = 2
+
+
b
diff -r 000000000000 -r 60573349e9ae configs/Nanopore-UL-iterative-Sep2020.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/Nanopore-UL-iterative-Sep2020.conf Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,46 @@
+# This configuration file is EXPERIMENTAL and should only
+# be used under the following conditions:
+# - Nanopore reads created by Guppy 3.6.0 or newer.
+# - Ultra-Long (UL) reads with typical N50 80Kb or more.
+# - High coverage 80X.
+# Iterative assembly results in some separation of haplotypes
+# and does better at resolving segmental duplications.
+
+[Reads]
+minReadLength = 30000 
+noCache = True
+
+[Kmers]
+k = 10 
+
+[MinHash]
+minBucketSize = 10 
+maxBucketSize = 40 
+minFrequency = 5 
+
+[Align]
+alignMethod = 3 
+matchScore = 6
+gapScore = -3 
+downsamplingFactor = 0.05 
+maxSkip = 100
+maxDrift = 100
+maxTrim = 100
+minAlignedMarkerCount = 10
+minAlignedFraction = 0.1
+sameChannelReadAlignment.suppressDeltaThreshold = 30 
+
+[ReadGraph]
+maxAlignmentCount = 12
+creationMethod = 2
+
+[MarkerGraph]
+minCoveragePerStrand = 3
+simplifyMaxLength = 10,100
+crossEdgeCoverageThreshold = 3 
+
+[Assembly]
+detangleMethod = 2 
+consensusCaller = Bayesian:guppy-3.6.0-a
+iterative = True
+
b
diff -r 000000000000 -r 60573349e9ae configs/PacBio-CCS-Dec2019.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/PacBio-CCS-Dec2019.conf Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,50 @@
+# This file contains Shasta options that, as of December 2019,
+# are known to work with PacBio CCS reads under the following 
+# circumstances:
+
+# - Human genome assembly.
+# - Coverage between 40x and 80x. If you have more coverage than that,
+#   you can use option "--Reads.minReadLength" to adjust coverage as desired.
+
+# To use this configuration file, specify Shasta option "--config PathToThisFile". 
+# If you specify any conflicting values on the command line,
+# the values specified on the command line take precedence.
+
+# In most cases, for best performance on a large assembly 
+# you will usually also want to use the following options, which 
+# cannot be specified in a configuration file:
+# --memoryMode filesystem
+# --memoryBacking 2M
+# Using these options require root access.
+
+# For detailed information on all available options see here:
+# https://chanzuckerberg.github.io/shasta/CommandLineOptions.html
+
+# For information on running a small assembly for which 
+# performance is not essential see here:
+# https://chanzuckerberg.github.io/shasta/QuickStart.html
+
+# For more information on running an assembly see here:
+# https://chanzuckerberg.github.io/shasta/Running.html
+
+# For information on optimizing assembly performance see here:
+# https://chanzuckerberg.github.io/shasta/Performance.html
+
+[Kmers]
+k = 15
+probability = 0.02
+
+[MinHash]
+m = 12
+minBucketSize = 20 
+maxBucketSize = 100
+minHashIterationCount = 25 
+minFrequency = 10
+
+[ReadGraph]
+maxAlignmentCount = 20 
+
+[Assembly]
+consensusCaller = Modal
+
+
b
diff -r 000000000000 -r 60573349e9ae configs/PacBio-CLR-Dec2019.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/PacBio-CLR-Dec2019.conf Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,45 @@
+# This file contains Shasta options that, as of December 2019,
+# are known to work with PacBio CLR reads under the following 
+# circumstances:
+
+# - Human genome assembly.
+# - Coverage between 40x and 80x. If you have more coverage than that,
+#   you can use option "--Reads.minReadLength" to adjust coverage as desired.
+
+# To use this configuration file, specify Shasta option "--config PathToThisFile". 
+# If you specify any conflicting values on the command line,
+# the values specified on the command line take precedence.
+
+# In most cases, for best performance on a large assembly 
+# you will usually also want to use the following options, which 
+# cannot be specified in a configuration file:
+# --memoryMode filesystem
+# --memoryBacking 2M
+# Using these options require root access.
+
+# For detailed information on all available options see here:
+# https://chanzuckerberg.github.io/shasta/CommandLineOptions.html
+
+# For information on running a small assembly for which 
+# performance is not essential see here:
+# https://chanzuckerberg.github.io/shasta/QuickStart.html
+
+# For more information on running an assembly see here:
+# https://chanzuckerberg.github.io/shasta/Running.html
+
+# For information on optimizing assembly performance see here:
+# https://chanzuckerberg.github.io/shasta/Performance.html
+
+
+[Reads]
+# Use this option to adjust coverage.
+minReadLength = 10000
+
+[ReadGraph]
+maxAlignmentCount = 20
+
+[Assembly]
+consensusCaller = Modal
+
+
+
b
diff -r 000000000000 -r 60573349e9ae macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,82 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">0.6.0</token>
+    <token name="@PROFILE@">18.01</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">shasta</requirement>
+        </requirements>
+    </xml>
+    <xml name="version_command">
+        <version_command>shasta --version</version_command>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41587-020-0503-6</citation>
+            <citation type="bibtex">@online{shasta,
+              author = {TODOLastName,TODOFirstName},
+              title = {shasta},
+              year = 2020,
+              url = {https://github.com/chanzuckerberg/shasta},
+              urldate = {2020-11-05}
+            }</citation>
+        </citations>
+    </xml>
+    <!--
+        input
+    -->
+    <token name="@INIT_INPUT_READS@"><![CDATA[
+#for $counter, $input in enumerate($in_data)
+    #if $input.is_of_type("fasta.gz")
+        #set $ext = "fasta"
+        gzip -dcf '$input' > ./input_${counter}.${ext} && 
+    #elif $input.is_of_type("fastq.gz","fastqsanger.gz")
+        #set $ext = "fastq"
+        gzip -dcf '$input' > ./input_${counter}.${ext} && 
+    #elif $input.is_of_type("fastqsanger", "fastq"):
+        #set $ext = "fastq"
+        ### I tried symlinking but shasta still complained about file extensions
+        cp '$input' ./input_${counter}.${ext} &&
+    #elif $input.is_of_type("fasta"):
+        #set $ext = "fasta"
+        ### I tried symlinking but shasta still complained about file extensions
+        cp '$input' ./input_${counter}.${ext} &&
+    #end if
+#end for
+    ]]></token>
+
+    <xml name="input_reads">
+        <param name="in_data" type="data" format="fasta,fastq,fastqsanger,fasta.gz,fastq.gz,fastqsanger.gz" multiple="true" label="Input read file(s)"/>
+    </xml>
+
+    <xml name="config_selection">
+        <param name="config_select" type="select" label="Initial config file to build off of." help="Link to the directory where these config files are stored can be found in the tool help below.">
+            <option value="Nanopore-Dec2019.conf"></option>
+            <option value="Nanopore-Jun2020.conf"></option>
+            <option value="Nanopore-OldGuppy-Sep2020.conf"></option>
+            <option value="Nanopore-Sep2020.conf"></option>
+            <option value="Nanopore-UL-Dec2019.conf"></option>
+            <option value="Nanopore-UL-Jun2020.conf"></option>
+            <option value="Nanopore-UL-Sep2020.conf"></option>
+            <option value="Nanopore-UL-iterative-Sep2020.conf"></option>
+            <option value="PacBio-CCS-Dec2019.conf"></option>
+            <option value="PacBio-CLR-Dec2019.conf"></option>
+        </param>
+    </xml>
+    <!--
+        Help
+    -->
+
+    <token name="@WID@"><![CDATA[
+The goal of the *Shasta* long read assembler is to rapidly produce accurate assembled sequence using as input DNA reads generated by Oxford Nanopore flow cells.
+
+Computational methods used by the Shasta assembler include:
+
+Using a run-length representation of the read sequence. This makes the assembly process more resilient to errors in homopolymer repeat counts, which are the most common type of errors in Oxford Nanopore reads.
+
+Using in some phases of the computation a representation of the read sequence based on markers, a fixed subset of short k-mers (k ≈ 10).
+]]></token>
+    <token name="@REFERENCES@"><![CDATA[
+More information are available on the `documentation <https://chanzuckerberg.github.io/shasta/>`_ or the `github <https://github.com/chanzuckerberg/shasta>`_ .
+    ]]></token>
+</macros>
b
diff -r 000000000000 -r 60573349e9ae shasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/shasta.xml Wed Nov 11 21:54:51 2020 +0000
[
b'@@ -0,0 +1,832 @@\n+<?xml version="1.0"?>\n+<tool id="shasta" name="Shasta" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">\n+    <description>De novo assembly of long read sequencing data</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    \n+    <expand macro="requirements"/>\n+\n+    <expand macro="version_command"/>\n+\n+    <command detect_errors="exit_code"><![CDATA[\n+### Initialize input reads (either unzip or symlink to have the right file extensions for shasta)\n+\n+@INIT_INPUT_READS@\n+\n+shasta --threads \\${GALAXY_SLOTS:-4}\n+\n+#if $use_config.use_config_select == "yes":\n+    --config \'${__tool_directory__}/configs/${use_config.config_select}\'\n+#elif $use_config.use_config_select == "file":\n+    --config \'$in_config\'\n+#end if\n+\n+--assemblyDirectory shasta_out\n+--command assemble\n+\n+######################\n+### Reads Options: ###\n+######################\n+\n+#if str($reads.min_read_length):\n+    --Reads.minReadLength \'${reads.min_read_length}\'\n+#end if\n+#if str($reads.desired_coverage):\n+    --Reads.desiredCoverage \'${reads.desired_coverage}\'\n+#end if\n+\n+#######################################\n+### Reads.palindromicReads Options: ###\n+#######################################\n+\n+#if $reads.palindromic_reads.skip_flagging:\n+    --Reads.palindromicReads.skipFlagging \'${reads.palindromic_reads.skip_flagging}\'\n+#end if\n+#if str($reads.palindromic_reads.max_skip):\n+    --Reads.palindromicReads.maxSkip \'${reads.palindromic_reads.max_skip}\'\n+#end if\n+#if str($reads.palindromic_reads.max_drift):\n+    --Reads.palindromicReads.maxDrift \'${reads.palindromic_reads.max_drift}\'\n+#end if\n+#if str($reads.palindromic_reads.max_marker_frequency):\n+    --Reads.palindromicReads.maxMarkerFrequency \'${reads.palindromic_reads.max_marker_frequency}\'\n+#end if\n+#if str($reads.palindromic_reads.aligned_fraction_threshold):\n+    --Reads.palindromicReads.alignedFractionThreshold \'${reads.palindromic_reads.aligned_fraction_threshold}\'\n+#end if\n+#if str($reads.palindromic_reads.near_diagonal_fraction_threshold):\n+    --Reads.palindromicReads.nearDiagonalFractionThreshold \'${reads.palindromic_reads.near_diagonal_fraction_threshold}\'\n+#end if\n+#if str($reads.palindromic_reads.delta_threshold):\n+    --Reads.palindromicReads.deltaThreshold \'${reads.palindromic_reads.delta_threshold}\'\n+#end if\n+\n+######################\n+### Kmers Options: ###\n+######################\n+\n+#if $kmers.generation_method:\n+    --Kmers.generationMethod \'${kmers.generation_method}\'\n+#end if\n+#if str($kmers.k):\n+    --Kmers.k \'${kmers.k}\'\n+#end if\n+#if str($kmers.probability):\n+    --Kmers.probability \'${kmers.probability}\'\n+#end if\n+#if str($kmers.enrichment_threshold):\n+    --Kmers.enrichmentThreshold \'${kmers.enrichment_threshold}\'\n+#end if\n+#if $kmers.file:\n+    --Kmers.file \'${kmers.file}\'\n+#end if\n+\n+########################\n+### MinHash Options: ###\n+########################\n+\n+#if $minhash.version:\n+    --MinHash.version \'${minhash.version}\'\n+#end if\n+#if str($minhash.m):\n+    --MinHash.m \'${minhash.m}\'\n+#end if\n+#if str($minhash.hash_fraction):\n+    --MinHash.hashFraction \'${minhash.hash_fraction}\'\n+#end if\n+#if str($minhash.min_hash_iteration_count):\n+    --MinHash.minHashIterationCount \'${minhash.min_hash_iteration_count}\'\n+#end if\n+#if str($minhash.alignment_candidates_per_read):\n+    --MinHash.alignmentCandidatesPerRead \'${minhash.alignment_candidates_per_read}\'\n+#end if\n+#if str($minhash.min_bucket_size):\n+    --MinHash.minBucketSize \'${minhash.min_bucket_size}\'\n+#end if\n+#if str($minhash.max_bucket_size):\n+    --MinHash.maxBucketSize \'${minhash.max_bucket_size}\'\n+#end if\n+#if str($minhash.min_frequency):\n+    --MinHash.minFrequency \'${minhash.min_frequency}\'\n+#end if\n+#if $minhash.all_pairs:\n+    ${minhash.all_pairs}\n+#end if\n+\n+######################\n+### Align Options: ###\n+######################\n+\n+#if $align.align_method:\n+    --Align.alignMethod \'${align.align_method}\'\n+#end if\n+#if str($align.max_skip):\n+    --Align.maxSkip \'${align.max'..b'<param name="min_coverage" value="10"/>\n+                <param name="max_coverage" value="100"/>\n+                <param name="min_coverage_per_strand" value="0"/>\n+                <param name="low_coverage_threshold" value="0"/>\n+                <param name="high_coverage_threshold" value="256"/>\n+                <param name="max_distance" value="30"/>\n+                <param name="edge_marker_skip_threshold" value="100"/>\n+                <param name="prune_iteration_count" value="6"/>\n+                <param name="simplifiy_max_length" value="10,100,1000"/>\n+                <param name="cross_edge_coverage_threshold" value="0"/>\n+                <param name="refine_threshold" value="0"/>\n+                <param name="reverse_transitive_reduction" value=""/>\n+                <section name="peak_finder">\n+                    <param name="min_area_fraction" value="0.08"/>\n+                    <param name="area_start_index" value="2"/>\n+                </section>\n+            </section>\n+            <section name="assembly">\n+                <param name="cross_edge_coverage_threshold" value="3"/>\n+                <param name="marker_graph_edge_length_threshold_for_consensus" value="1000"/>\n+                <conditional name="consensus_caller">\n+                    <param name="consensus_caller_select" value="Bayesian:guppy-3.0.5-a"/>\n+                </conditional>\n+                <!-- The below option only can be meaningfully set with memoryMode set to filesystem, which requires root access. Therefore, removing this param-->\n+                <!-- <param name="store_coverage_data_csv_length_threshold" value="0"/> -->\n+                <param name="write_reads_by_assembled_segment" value="false"/>\n+                <section name="detangle">\n+                    <param name="detangle_method" value="0"/>\n+                    <param name="diagonal_read_count_min" value="1"/>\n+                    <param name="off_diagonal_read_count_max" value="2"/>\n+                    <param name="off_diagonal_ratio" value="0.3"/>\n+                </section>\n+                <section name="iterative">\n+                    <param name="iterative" value=""/>\n+                    <param name="iteration_count" value="3"/>\n+                    <param name="pseudo_path_align_match_score" value="1"/>\n+                    <param name="pseudo_path_align_mismatch_score" value="-1"/>\n+                    <param name="pseudo_path_align_gap_score" value="-1"/>\n+                    <param name="mismatch_square_factor" value="3"/>\n+                    <param name="min_score" value="0"/>\n+                    <param name="max_alignment_count" value="6"/>\n+                    <param name="bridge_removal_iteration_count" value="3"/>\n+                    <param name="bridge_removal_max_distance" value="2"/>\n+                </section>\n+            </section>\n+            <output name="out_fasta" file="out_fasta.fasta">\n+                <assert_contents>\n+                    <has_n_lines n="2"/>\n+                </assert_contents>\n+            </output>\n+            <output name="out_conf" file="out_config2.txt">\n+                <assert_contents>\n+                    <has_n_lines n="100"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+.. class:: infomark\n+\n+**What it does**\n+\n+@WID@\n+\n+**Input**\n+\n+- fasta / fastq data\n+\n+- config file (optional)\n+\n+\n+Example config files can be found `here <https://github.com/chanzuckerberg/shasta/tree/master/conf>`_ (these files correspond to the packaged config files available for this tool).\n+\n+**Output**\n+\n+- Assembled genome (FASTA)\n+- Graphical fragment assembly (optional) (.gfa1)\n+- Log file (optional) (.txt)\n+- Config file (optional) (.txt)\n+- Read CSV detailing which reads align to which assembly fragments (optional) (.csv)\n+- Coverage CSV List (optional) (list:.csv)\n+\n+**References**\n+\n+@REFERENCES@\n+    ]]></help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r 60573349e9ae test-data/nanopore.fasta.gz
b
Binary file test-data/nanopore.fasta.gz has changed
b
diff -r 000000000000 -r 60573349e9ae test-data/out_config.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_config.txt Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,100 @@
+[Reads]
+minReadLength = 10000
+desiredCoverage = 0
+noCache = False
+palindromicReads.skipFlagging = False
+palindromicReads.maxSkip = 100
+palindromicReads.maxDrift = 100
+palindromicReads.maxMarkerFrequency = 10
+palindromicReads.alignedFractionThreshold = 0.1
+palindromicReads.nearDiagonalFractionThreshold = 0.1
+palindromicReads.deltaThreshold = 100
+
+[Kmers]
+generationMethod = 0
+k = 10
+probability = 0.1
+enrichmentThreshold = 100
+file = 
+
+[MinHash]
+version = 0
+m = 4
+hashFraction = 0.01
+minHashIterationCount = 10
+alignmentCandidatesPerRead = 20
+minBucketSize = 5
+maxBucketSize = 30
+minFrequency = 5
+allPairs = False
+
+[Align]
+alignMethod = 3
+maxSkip = 30
+maxDrift = 30
+maxTrim = 30
+maxMarkerFrequency = 10
+minAlignedMarkerCount = 100
+minAlignedFraction = 0.4
+matchScore = 6
+mismatchScore = -1
+gapScore = -1
+downsamplingFactor = 0.1
+bandExtend = 10
+maxBand = 1000
+sameChannelReadAlignment.suppressDeltaThreshold = 0
+suppressContainments = False
+
+[ReadGraph]
+creationMethod = 0
+maxAlignmentCount = 6
+minComponentSize = 100
+maxChimericReadDistance = 2
+crossStrandMaxDistance = 6
+containedNeighborCount = 6
+uncontainedNeighborCountPerDirection = 3
+removeConflicts = False
+markerCountPercentile = 0.015
+alignedFractionPercentile = 0.12
+maxSkipPercentile = 0.12
+maxDriftPercentile = 0.12
+maxTrimPercentile = 0.015
+
+[MarkerGraph]
+minCoverage = 10
+maxCoverage = 100
+minCoveragePerStrand = 0
+lowCoverageThreshold = 0
+highCoverageThreshold = 256
+maxDistance = 30
+edgeMarkerSkipThreshold = 100
+pruneIterationCount = 6
+simplifyMaxLength = 10,100,1000
+crossEdgeCoverageThreshold = 0
+refineThreshold = 0
+reverseTransitiveReduction = False
+peakFinder.minAreaFraction = 0.08
+peakFinder.areaStartIndex = 2
+
+[Assembly]
+crossEdgeCoverageThreshold = 3
+markerGraphEdgeLengthThresholdForConsensus = 1000
+consensusCaller = Bayesian:guppy-3.0.5-a
+storeCoverageData = False
+storeCoverageDataCsvLengthThreshold = 0
+writeReadsByAssembledSegment = True
+detangleMethod = 0
+detangle.diagonalReadCountMin = 1
+detangle.offDiagonalReadCountMax = 2
+detangle.offDiagonalRatio = 0.3
+iterative = False
+iterative.iterationCount = 3
+iterative.pseudoPathAlignMatchScore = 1
+iterative.pseudoPathAlignMismatchScore = -1
+iterative.pseudoPathAlignGapScore = -1
+iterative.mismatchSquareFactor = 3
+iterative.minScore = 0
+iterative.maxAlignmentCount = 6
+iterative.bridgeRemovalIterationCount = 3
+iterative.bridgeRemovalMaxDistance = 2
+
b
diff -r 000000000000 -r 60573349e9ae test-data/out_config2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_config2.txt Wed Nov 11 21:54:51 2020 +0000
[
@@ -0,0 +1,100 @@
+[Reads]
+minReadLength = 10000
+desiredCoverage = 0
+noCache = False
+palindromicReads.skipFlagging = False
+palindromicReads.maxSkip = 100
+palindromicReads.maxDrift = 100
+palindromicReads.maxMarkerFrequency = 10
+palindromicReads.alignedFractionThreshold = 0.1
+palindromicReads.nearDiagonalFractionThreshold = 0.1
+palindromicReads.deltaThreshold = 100
+
+[Kmers]
+generationMethod = 0
+k = 10
+probability = 0.1
+enrichmentThreshold = 100
+file = 
+
+[MinHash]
+version = 0
+m = 4
+hashFraction = 0.01
+minHashIterationCount = 10
+alignmentCandidatesPerRead = 20
+minBucketSize = 5
+maxBucketSize = 30
+minFrequency = 5
+allPairs = False
+
+[Align]
+alignMethod = 3
+maxSkip = 30
+maxDrift = 30
+maxTrim = 30
+maxMarkerFrequency = 10
+minAlignedMarkerCount = 100
+minAlignedFraction = 0.4
+matchScore = 6
+mismatchScore = -1
+gapScore = -1
+downsamplingFactor = 0.1
+bandExtend = 10
+maxBand = 1000
+sameChannelReadAlignment.suppressDeltaThreshold = 0
+suppressContainments = False
+
+[ReadGraph]
+creationMethod = 0
+maxAlignmentCount = 6
+minComponentSize = 100
+maxChimericReadDistance = 2
+crossStrandMaxDistance = 6
+containedNeighborCount = 6
+uncontainedNeighborCountPerDirection = 3
+removeConflicts = False
+markerCountPercentile = 0.015
+alignedFractionPercentile = 0.12
+maxSkipPercentile = 0.12
+maxDriftPercentile = 0.12
+maxTrimPercentile = 0.015
+
+[MarkerGraph]
+minCoverage = 10
+maxCoverage = 100
+minCoveragePerStrand = 0
+lowCoverageThreshold = 0
+highCoverageThreshold = 256
+maxDistance = 30
+edgeMarkerSkipThreshold = 100
+pruneIterationCount = 6
+simplifyMaxLength = 10,100,1000
+crossEdgeCoverageThreshold = 0
+refineThreshold = 0
+reverseTransitiveReduction = False
+peakFinder.minAreaFraction = 0.08
+peakFinder.areaStartIndex = 2
+
+[Assembly]
+crossEdgeCoverageThreshold = 3
+markerGraphEdgeLengthThresholdForConsensus = 1000
+consensusCaller = Bayesian:guppy-3.0.5-a
+storeCoverageData = False
+storeCoverageDataCsvLengthThreshold = 0
+writeReadsByAssembledSegment = False
+detangleMethod = 0
+detangle.diagonalReadCountMin = 1
+detangle.offDiagonalReadCountMax = 2
+detangle.offDiagonalRatio = 0.3
+iterative = False
+iterative.iterationCount = 3
+iterative.pseudoPathAlignMatchScore = 1
+iterative.pseudoPathAlignMismatchScore = -1
+iterative.pseudoPathAlignGapScore = -1
+iterative.mismatchSquareFactor = 3
+iterative.minScore = 0
+iterative.maxAlignmentCount = 6
+iterative.bridgeRemovalIterationCount = 3
+iterative.bridgeRemovalMaxDistance = 2
+
b
diff -r 000000000000 -r 60573349e9ae test-data/out_fasta.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_fasta.fasta Wed Nov 11 21:54:51 2020 +0000
b
b'@@ -0,0 +1,2 @@\n+>0 length 16594\n+TCACCTTTCTGCGTTCTCCGGATTTTTTCGCACGTGACTGCCCAGTTCGCTAAGGCTCGATTCCGATCTTAGCTTCCGAAGCATCTCTGCGTACGAAGCCTCCTCCTGTCTTTTTTTTTACGATGAGTGCCTCTGCACCCGTGCTAGCATTCTTGGCGACAGATGCATAGCTGCCTCGGCTTGGACTCGTTAGGTATGGATGTCGCCTTTTCTTCACCGCTTGCCATCCAACGTAGGGTTGCTCTGCTTCGGCGCCTGGACTTTGGCACCTGCACCGTGGGTACCTGCGCATTTTGTGGAGCCTTTTGGTAGGCTTTGTACCTCCGTCTGGGTAGCGCTGCTCTTCAAGCGGTGCTCGCTAGTAGTACCTTCTTGTACAGCTCTATCGCCCCTGTTGTTAAGGGCTGAGAGTTCGGAAGCTATGCCCTTTGTCTCTCATGTGTGTCGTTGGGTCTCGAATGCGGATAGAAGGGGGGCTGATCTTGCCATTCATCCTGGTAAGTATGCCCCTCAGGTCCTCGTTCTGGAGAGCAGTGGTCAAGTCCGAAGACCGATGAGGGGAACCAGCTGGGGACTTCCCTCCCGTCTTTTGCGATGGCACGGCGATGTGGTCCATCGTCTTCTGACTTGTCGCGGTCGTAGATGCTGTAGGAGCCAATTTCTCCACCTCTACAGCATCGCTGCAGTCGATAGTCAACGGCGGCTGGGGCCTGCGTGCTCACCCCCTTTGCCACCGACGGGACCGAGATTCCGTCACCATGGGTCGGGTTACCGACTCCTCTGGTGGGGGATCTCGAAAGTCGGGAGCTCCTCCTGAACGGGTCGTTGCGATCAGGGGACTGCCCCCACTGCGCTGTCCATCAACCGAAGGATGGCCTAAGGTTACCTAGGGTCACGTAGGCCACGGATTTCCCCGGCTTTTACGGATGGAGGTTTGTACCGCCTCGACTCGGACACCAGAGCCTCTGCTTAGGCACCAGCACGGTGAGAGGATGATATTTGGTCTGTCCGCGACCTTTGTGCCTTTTCCTTTTCCCCTTTCTTAAAGACTTGTGCGCTCCCCAGTTACCAGAATCAAAATTAAATTAAATTAAAAAATTTCAACGGCTTAATTTTTAAATTTAATTTTGAATTTCGCGCCCTACTGTCGCAACACAACACTGATCACACGGTTGATCTGGCAACGCCCCTCCCTCTCCAGATCGTCCCGCTGCGCTATCAGCTGTTTATCCGCTGCAACAATCGCAAGATAATAAATTACCTATTTACATGAAACAGGCGCACAATTAACTTCGCGTGATTGCAGCGCAGAACTATGGCGCGACTCCACCGTGTGATTTTGCCAGTTTGCCGCACACACCGGCCGACACAACTTCTATGGTTTCGAGTATTTCGCGCCGCGTAATTTAAGACGTCCGTCTTCACGGCGATGTAGACTTGGTCTCCGCACCAGGATTGAGTGTCTAAGCCTGTGCGTTCAGGATTAAACTTTGGCAATACGACAGCATTGAGTTTTTGGGTCTCTTTGTGAGTTTTGCACTTCTTTTAACAGTTCAGAAAAATGGCGATTCTGCGACTCTAGAATTGTCAGCAAATTGTTTTCACTATTGTAGTATGCACACATATCGGTTAAGCACTGTACCTATGTGTATTCATAATTGTAGCACTCTGTTGCAATGTGTACATAAACAGTCCAGCACAAGTGTATACGTAATACGAGCACCCACTTGAGGGGCTAACATACATAATTGCCTGGTCAGCAGTTCTCCCGCTGGCCAAGCGCTGATCGCTTATATGTGTGCCTATACCACTCCTCCTCCAGCACCATCGCTCGGCGACAGTACATATATATATACATATATATAAGATACGCATATAAGCGTTGCTGCGCTGCAGGCTATGCCGGCAGCGCTGCTCCTCGACTTAGGCTAAGAAACCTTTGTATGTAGATCATAAGAGGATTGACGAATTAATTTCGAATGGAGCGGGACCTTCCCTGTTCCTCAAATTGGATCAAATAAAGAACAGCTAAAACACAAATGATCTATGACACAATGAAACTACAGGAAGTTGGAGTAGGCGGTCCCACTTCTGATGTCGGATCTGAATCACTTTATACTCGGCATCACTCTCTACAGCGGACACAAAGTAACAGAAAACGGTCGTTAACTTGAAATTAAAAATAAATCTGCGACAAGTACAGTTTGTCAAGAAACTGTTTACACACCGTGAAATAAGATGAATTTTTGATTTTAACGGTCAAAATTAAGGGTTATTTGCTTAATTAAACGCAATTTTTTTTTTAGGAAATATAATTAAACAATATTTATTTTACTTATAAATTAAAAAACAAATTCAAAATATCAAATATACAAGAAAATAAATAACAAACTTCCAAGTTTACACACTTTTGAGAGTGTCACTCTGCCCAAACCGTCATCTCTGGGATGAGCTGATAGGTCCACCTCCCCCCGTCGCGACGTTTGCCATCTGCTCTGCCATTCACCTAATAGCCACTCGTGTGCCTCTTGCCTGGAAGCGCCACTCCGCATAAGGCTGAGAGCCTTGATCTCCAGATCAATCGGCAGGCCTGCCAGCGCTAGCGCCGCGTCTTCGGATATGGTCCTGAATCCTCTAATGAGCCTGAGGGCCATTGACCGTAGCACCGAACGAGCTCCTTTCAGGTATGAGACCCTGCAAGTGGCATTGCTCCAGACTGGTGCAGCGTATAGCAGCGAAGCCTTTGCTACTGACACCAGCAGTTTCCTGGCCGGGTGTCTTGGGCCTCCGACGTTGGGCATGCCTCGCCAACGAAGAGGATGTGACTGCTGTCTTCTTGCTGGCGTAGCTCGCGTGGTCCTTGAACGATAGTCTGCGATCTATCATTACCCCCAGGTACTTTAGGGACTCTTGAGAGGTCCCCTGAGTACCCTTGACGGAGACCAGCATGTTCTCCATCTTTTCCTGCTGCTTAGTAGGACTGCTTCGGTCTTGTGAGCCGCTATTGCTAGCCCGGCTGTGATTGCCACATCGTCAGCGAAACAGCTGCCTCGACGACTTCAAGTTCTGTGACGCACGGGAAATCTGGGGCAGGGGCTGCCTCTGTTGGCCTCCATAGTGTGGTCTGCTTTGGAAATAGCTCCCCTAGTATGTTAGCCAGGACCCCCGGGTCGGATGGGGTTGCCGCTCCTCCTCCTCCTTAGCTGCTTGGTAACAAGCTTGTAGGCGAGGCCCCAGATATCGCTGTCTACGCCATCCTGCAGCTCCTTAAACGACCGCGCTTTGGCAGCCGCGATGCCGTGCTTGAACTCTAGGCGTTTGCTTCTGAAAGCCTCCAAGAGTTCCGCGTGGTGGAGTACTGCCTCTGGCTCGTTGCGCCGTTCTCTTAGCCCTGAGGCAATCAGACCGTAGTTGGCTTAGGGAGGCACTCCACCAGTAAACGGGTGGTTTGCGCTGTGCCTTATTTTTTCCTTGGCATGATTGCGTCGCAGATTCTTCCCAGCATATTCATGAGGCCTGCCGCCATACTCTCTGCGTCCACATTTGGGATTTCCAGGGGAATTGATCTGATAGGCCAGCATGGCCTCATCGATCTTCCTGGTGTCCCATGTTTTCCCGGCTGTTCTACTCTGCCGTCTTTTGGGCATGTCCCTCCGGGGAGAAGGAGATCAGGGCATGGTCGCTCAGCGTCATGACGTCATGGACCATCCAGTTGTTGTTGTCTACTAGCCCTCTGCTGACAAAGGTAACGTCAATAAAGGACGTACCCCTATCGTTGTTAAACGTCGGCTTCCGTCCATCGTTCAGCAGTATGAGGTCCAGCATTCGCATGGCGTCAATCACAGCTCGTCCTCTGGTGTTGGATGTCCTGCTGCCCCATTCCACTGTCCAGGCATTAAAGTCGCCGGTATGACCTTCGGGCTTGCCCTCTCGCATGGTCCACGAGCGCCTCCAGAAGCCCTCGA'..b'GATTGAGATCACACCAACCAACAAAACAAAACGTCTTCGATTTGCGTTGGAATATGTTAAGAAGCCTCTTGACTTTTGGTTTAATATTTTATGGACTGATGAGTCTGCATTTCAGTACCAGGGGTCATACAGCAAGCATTTTATGCATTTGAAAAATAATCAAAAGCATTTGGCAGCCCAACCAACCAATAGATTTGGTGGTGGGGGCACAGTCATGTTTTGGGGATGTCTTTCCTATTATGGATTCGGAGACTTGGTACCGATAGAAGGTACTTTAAATCAGAACGGATACCTTCTTATCTTAAACAACCATGCTTTTACGTCTGGAAATAGACTTTTTCCAACTACTGAATGGATTCTTCAGCAGGACAATGCTCCATGCCATAAGGGTAGGATACCAACAAAATTTTTAAACGACCTTAATCTGGCGGTTCTTCCGTGGCCCCCCCCCAAAGCCCAGACCTTAATATCATTGAAAACGTTTGGGCTTTTATTAAAACCAACGAACTATTGATAAAAATAGAAAACGAGAGGGAGCCATCATTGAAATAGCGGAGATTTGGTCCAAATTGACATTAGAATTTGCACAAACTTTGGTAAGGTCAATACCAAAAGACTTCAAGCAGTTATTGATGCCAAAGGTGGTGTTACAAAATATTAGTATTGTATTTATATAAAATAAAGAAATTCTTATGTTGAAATTAGATGTTAAGCTGAAATTTACTAAATTAAGTTGAGTGAAAATACTTTTGAAGCGCAATAAACATGTGAAAATACTATTGACAACTTGCATGCATATTTTCTTTTGCTTTTAAGCTTTGTACTATGAACCGTTATCTTTCGTATTTCTTTTCGACTACCTTCTGCATAGATCAAGCTAAGCGATAAGAACTATTTCAGGCAAATCGGACAACAACAAGAAGAAATATAACAAAAGAAGTTGAAGTTTGCAAATATTGTGCGTTGTGAAAATACTTTGACCACCTCTGTATATAGTTTGCAGGAGCACGGTATTGAACTTAAATGTACTGATGAGGAAATCAAACGCTACATTGGCATTTTATTGTACTTTGGTGTTTTAAAACTACCGCAATTCAGAATGGCGTGGTCAAAGTATTTAAAGATTACCGCAATAACTGATTCAATGCCGCGTGGGAGATTTAAAAAAATAAAACAATGCTTACATTTCAACGACAACGCCAAACAATTAAAAAAAGGGGATTGCAACTATGATAAACTCTACAAGATCCGCCCTTTGTTCAGAATTCTCAAAGAAAATTTTGGAAAAACTAACGCAGGAAGAGCATCAAAGTGTCGATGAGCAAATAATTGCATTCAAAGGTACGTTTTAATTTTCTTTTAAATTAGCTTTATTTTTTAATAATTGCTTTTGTTGCAGGTCGATCCACGCTTAGGCAATATAATCCAAACCTCATAAATGGGGTCTTAAAATGTTTACGCGGGCTGGAATATCTGGATTAGTTTATGATTTTATTGCTATATGTTGGAGAAGGCACTTCTCCTTCTTATGGCTTGGGAATATCATCTTATGTTGTCTTATATTTGGCAGAAAGTCTTCCCAAAGACAAAATTTTAAACTGTATTTTGATAATTGGTTTACGTCTGTAATCCTTCTGATTTCGTTGAAGGAAATAGGAATCTTTGCAACAGGTACCTGTACGTATGATAAAGTTGAACATTGGGTAGTTTTTTGGAGAAAGAGGACGTTGCAGACTGTGCAAAATTGCAACACCGATGACCAAATGCCTTACATGCAAAGTCCATCTGTGCTGCAATAACAATAAAAACTGTTTTTTGTCATACCACACTTAAATTGTCATTATAAAGAAAAATATTTCATATTCTGTGATTTATAAAAAAAAAACAATGCTTACACATCACTACTGCCCGACGTTGCTCACAAGAAAACTTTTGCTACCGCCCAAACTAATGGGCGTGGCATACCTAAAATTTTGCTAAATTTTTTCTAAAATAAATGTAAAACATTAATGATAAAACAAAATTTCACGGGTAAAAAAGTTGGGCGCGAAAGGGTTAAAAGACTTCTGTAGCATACTTTGGAATCCCAGAATCTCACATGCGAATAATGGAAAGTTTTGATCTAAGCTCAGATCATTCTCTAATAATAGTGACATACAGTACAGTAGCTCATATATTGCAAATACAGATATCAATGCATTTAAAAGTTATCTGGAAACAGCTATCAGCCTGGATATCTCGCTAAAATCAGGAGAGGAGAGCTACTGTGGAGCTACTCACAAACAAGATCCATAAAGCAAGCTATATATGTACGAAGCTACCAGCCAGAAACTCACAATCAAATCAGCTCTATCTCTCAGCTGAAACTCCGACAACAAATACAACACAAGAGAAATTTGCGTAAGAGATGGCAAGAAACTCTCTACCCTGCCGACAAAAGATCGTATAACAAGGCTGCATCTGATCGCAGAAAACTACTGTCAACTTTAAGAAATTAATCTCTCGCTGAATATCTTAGAAATCTAGATCCACATTCTTGTAACCACGAACATAATTTATGGAGTAACCAAATATCTCAAGCGACCTGCAAAAGAAACACAGTAGTCCGAAACTCTAATGGCGAATGGCGTAGATCTGATGATGAACAAGCCAAAGCATTTGCTTAACACCTGCACTCTGTATTTCAGCCAAATGATATTGATAACCCGCAAACAGAAAGGGAAGTAGATAACTTTTTTCGAGTCACCGCCAAATGAGCTTACCCATTCGTAAAATCAGTATTAATGAAGTTTCATCAGAAATGGCTAAAAAGTAAAAAGGCTCCAGATTGGGACAAAATAGATGGCATAGCCTTGAAAAAAAAAATGGGCGGTTAGAGTTGACCACTTCCCAAGCCAATGGAAATGTGCAGAAATTATAGGAATCCTTAAACCAAACAAGGCAGAAAATGAAGTGACATTGTACCGTCCCATTAGTTTGTTGTCAATATTTTCTAAAGTATTTAAAAAAAATAATTTTAAAGAGAATGTTGTCAATCTTGGAAGAATTGCTATCATACCCAAACACCAGTTTGGATTCGAAGAGGCCACGGAACCCCTGAGCAATGTCACAGGATTAAAAAATGAAATTTCGTCAGCATTTGAGAGCAAAAAATACTGCACTGCTAAATTTCTTGACGTTTAACAAGCGTTTGATCGAGTCTGGCATGATGGCTTATCATATAAAACCATATTTTATATTATTAAAGTCATACTTAACCAATAGACAATTTTATTTGCAACAAAAAAATGAATACTCGCCCTTGCACTTTATAAAAGCTGGAGTCCCACAAGGAAGAGTCTTAGTAACTGTCTTATACACCCTGTAACGGCAGATATGCCGGTAACAAATACCGCCTGTAACAAATGATACAGCTATATTAGCTACAAGCTCATCTAAAGAGGAAGCCTCACAACTCCTGCAAGCAGAGCTACGCCTTATTGAAAGCTGGTTTCTTATTTGGAAAATTAAAGTCAACGCCCTGAAATCTGCGCAAATAACTTTTGCATTAAGAAGAGGTAACTGCCCAGAAGTGTCATTTAATGGATCAGCAATCCCACAAAGTATCGATCGCAGACTAACGTGGAAACACATAAACATAAAAGCAAAGCGCCAGCAACTAAATCAAAAAAGTTTGAAGATGACCTGGTTGCTTGGCCGAAAATCTGCAACCACTCGGAAAATAAAGTCCGTTTATACAAAGCTATACTAAAGCCCATGTGGACTTATGACATACAGCTTTGGGGTACTGCCAGCAACTCAAATATTGAGATTCTACAATGCTACCAATCAAAAAATATTAAGACAAATTGTTAATGCTCCATTTTATATTTCAAATGCAAGTATCTATAAAGACTTAGGAATCCCTTATGTTAAAGAAGAAATAGCAAAACATATTAAAAATATATAGACAGACCAAGAACACATGAAAATAACTTAGCCTTAAATTTGGTAAATAA\n'
b
diff -r 000000000000 -r 60573349e9ae test-data/out_gfa.gfa1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_gfa.gfa1 Wed Nov 11 21:54:51 2020 +0000
b
b'@@ -0,0 +1,2 @@\n+H\tVN:Z:1.0\n+S\t0\tTCACCTTTCTGCGTTCTCCGGATTTTTTCGCACGTGACTGCCCAGTTCGCTAAGGCTCGATTCCGATCTTAGCTTCCGAAGCATCTCTGCGTACGAAGCCTCCTCCTGTCTTTTTTTTTACGATGAGTGCCTCTGCACCCGTGCTAGCATTCTTGGCGACAGATGCATAGCTGCCTCGGCTTGGACTCGTTAGGTATGGATGTCGCCTTTTCTTCACCGCTTGCCATCCAACGTAGGGTTGCTCTGCTTCGGCGCCTGGACTTTGGCACCTGCACCGTGGGTACCTGCGCATTTTGTGGAGCCTTTTGGTAGGCTTTGTACCTCCGTCTGGGTAGCGCTGCTCTTCAAGCGGTGCTCGCTAGTAGTACCTTCTTGTACAGCTCTATCGCCCCTGTTGTTAAGGGCTGAGAGTTCGGAAGCTATGCCCTTTGTCTCTCATGTGTGTCGTTGGGTCTCGAATGCGGATAGAAGGGGGGCTGATCTTGCCATTCATCCTGGTAAGTATGCCCCTCAGGTCCTCGTTCTGGAGAGCAGTGGTCAAGTCCGAAGACCGATGAGGGGAACCAGCTGGGGACTTCCCTCCCGTCTTTTGCGATGGCACGGCGATGTGGTCCATCGTCTTCTGACTTGTCGCGGTCGTAGATGCTGTAGGAGCCAATTTCTCCACCTCTACAGCATCGCTGCAGTCGATAGTCAACGGCGGCTGGGGCCTGCGTGCTCACCCCCTTTGCCACCGACGGGACCGAGATTCCGTCACCATGGGTCGGGTTACCGACTCCTCTGGTGGGGGATCTCGAAAGTCGGGAGCTCCTCCTGAACGGGTCGTTGCGATCAGGGGACTGCCCCCACTGCGCTGTCCATCAACCGAAGGATGGCCTAAGGTTACCTAGGGTCACGTAGGCCACGGATTTCCCCGGCTTTTACGGATGGAGGTTTGTACCGCCTCGACTCGGACACCAGAGCCTCTGCTTAGGCACCAGCACGGTGAGAGGATGATATTTGGTCTGTCCGCGACCTTTGTGCCTTTTCCTTTTCCCCTTTCTTAAAGACTTGTGCGCTCCCCAGTTACCAGAATCAAAATTAAATTAAATTAAAAAATTTCAACGGCTTAATTTTTAAATTTAATTTTGAATTTCGCGCCCTACTGTCGCAACACAACACTGATCACACGGTTGATCTGGCAACGCCCCTCCCTCTCCAGATCGTCCCGCTGCGCTATCAGCTGTTTATCCGCTGCAACAATCGCAAGATAATAAATTACCTATTTACATGAAACAGGCGCACAATTAACTTCGCGTGATTGCAGCGCAGAACTATGGCGCGACTCCACCGTGTGATTTTGCCAGTTTGCCGCACACACCGGCCGACACAACTTCTATGGTTTCGAGTATTTCGCGCCGCGTAATTTAAGACGTCCGTCTTCACGGCGATGTAGACTTGGTCTCCGCACCAGGATTGAGTGTCTAAGCCTGTGCGTTCAGGATTAAACTTTGGCAATACGACAGCATTGAGTTTTTGGGTCTCTTTGTGAGTTTTGCACTTCTTTTAACAGTTCAGAAAAATGGCGATTCTGCGACTCTAGAATTGTCAGCAAATTGTTTTCACTATTGTAGTATGCACACATATCGGTTAAGCACTGTACCTATGTGTATTCATAATTGTAGCACTCTGTTGCAATGTGTACATAAACAGTCCAGCACAAGTGTATACGTAATACGAGCACCCACTTGAGGGGCTAACATACATAATTGCCTGGTCAGCAGTTCTCCCGCTGGCCAAGCGCTGATCGCTTATATGTGTGCCTATACCACTCCTCCTCCAGCACCATCGCTCGGCGACAGTACATATATATATACATATATATAAGATACGCATATAAGCGTTGCTGCGCTGCAGGCTATGCCGGCAGCGCTGCTCCTCGACTTAGGCTAAGAAACCTTTGTATGTAGATCATAAGAGGATTGACGAATTAATTTCGAATGGAGCGGGACCTTCCCTGTTCCTCAAATTGGATCAAATAAAGAACAGCTAAAACACAAATGATCTATGACACAATGAAACTACAGGAAGTTGGAGTAGGCGGTCCCACTTCTGATGTCGGATCTGAATCACTTTATACTCGGCATCACTCTCTACAGCGGACACAAAGTAACAGAAAACGGTCGTTAACTTGAAATTAAAAATAAATCTGCGACAAGTACAGTTTGTCAAGAAACTGTTTACACACCGTGAAATAAGATGAATTTTTGATTTTAACGGTCAAAATTAAGGGTTATTTGCTTAATTAAACGCAATTTTTTTTTTAGGAAATATAATTAAACAATATTTATTTTACTTATAAATTAAAAAACAAATTCAAAATATCAAATATACAAGAAAATAAATAACAAACTTCCAAGTTTACACACTTTTGAGAGTGTCACTCTGCCCAAACCGTCATCTCTGGGATGAGCTGATAGGTCCACCTCCCCCCGTCGCGACGTTTGCCATCTGCTCTGCCATTCACCTAATAGCCACTCGTGTGCCTCTTGCCTGGAAGCGCCACTCCGCATAAGGCTGAGAGCCTTGATCTCCAGATCAATCGGCAGGCCTGCCAGCGCTAGCGCCGCGTCTTCGGATATGGTCCTGAATCCTCTAATGAGCCTGAGGGCCATTGACCGTAGCACCGAACGAGCTCCTTTCAGGTATGAGACCCTGCAAGTGGCATTGCTCCAGACTGGTGCAGCGTATAGCAGCGAAGCCTTTGCTACTGACACCAGCAGTTTCCTGGCCGGGTGTCTTGGGCCTCCGACGTTGGGCATGCCTCGCCAACGAAGAGGATGTGACTGCTGTCTTCTTGCTGGCGTAGCTCGCGTGGTCCTTGAACGATAGTCTGCGATCTATCATTACCCCCAGGTACTTTAGGGACTCTTGAGAGGTCCCCTGAGTACCCTTGACGGAGACCAGCATGTTCTCCATCTTTTCCTGCTGCTTAGTAGGACTGCTTCGGTCTTGTGAGCCGCTATTGCTAGCCCGGCTGTGATTGCCACATCGTCAGCGAAACAGCTGCCTCGACGACTTCAAGTTCTGTGACGCACGGGAAATCTGGGGCAGGGGCTGCCTCTGTTGGCCTCCATAGTGTGGTCTGCTTTGGAAATAGCTCCCCTAGTATGTTAGCCAGGACCCCCGGGTCGGATGGGGTTGCCGCTCCTCCTCCTCCTTAGCTGCTTGGTAACAAGCTTGTAGGCGAGGCCCCAGATATCGCTGTCTACGCCATCCTGCAGCTCCTTAAACGACCGCGCTTTGGCAGCCGCGATGCCGTGCTTGAACTCTAGGCGTTTGCTTCTGAAAGCCTCCAAGAGTTCCGCGTGGTGGAGTACTGCCTCTGGCTCGTTGCGCCGTTCTCTTAGCCCTGAGGCAATCAGACCGTAGTTGGCTTAGGGAGGCACTCCACCAGTAAACGGGTGGTTTGCGCTGTGCCTTATTTTTTCCTTGGCATGATTGCGTCGCAGATTCTTCCCAGCATATTCATGAGGCCTGCCGCCATACTCTCTGCGTCCACATTTGGGATTTCCAGGGGAATTGATCTGATAGGCCAGCATGGCCTCATCGATCTTCCTGGTGTCCCATGTTTTCCCGGCTGTTCTACTCTGCCGTCTTTTGGGCATGTCCCTCCGGGGAGAAGGAGATCAGGGCATGGTCGCTCAGCGTCATGACGTCATGGACCATCCAGTTGTTGTTGTCTACTAGCCCTCTGCTGACAAAGGTAACGTCAATAAAGGACGTACCCCTATCGTTGTTAAACGTCGGCTTCCGTCCATCGTTCAGCAGTATGAGGTCCAGCATTCGCATGGCGTCAATCACAGCTCGTCCTCTGGTGTTGGATGTCCTGCTGCCCCATTCCACTGTCCAGGCATTAAAGTCGCCGGTATGACCTTCGGGCTTGCCCTCTCGCATGGTCCACGAGCGCCTCCAGAAGCCCTCGAA'..b'ACCAACCAACAAAACAAAACGTCTTCGATTTGCGTTGGAATATGTTAAGAAGCCTCTTGACTTTTGGTTTAATATTTTATGGACTGATGAGTCTGCATTTCAGTACCAGGGGTCATACAGCAAGCATTTTATGCATTTGAAAAATAATCAAAAGCATTTGGCAGCCCAACCAACCAATAGATTTGGTGGTGGGGGCACAGTCATGTTTTGGGGATGTCTTTCCTATTATGGATTCGGAGACTTGGTACCGATAGAAGGTACTTTAAATCAGAACGGATACCTTCTTATCTTAAACAACCATGCTTTTACGTCTGGAAATAGACTTTTTCCAACTACTGAATGGATTCTTCAGCAGGACAATGCTCCATGCCATAAGGGTAGGATACCAACAAAATTTTTAAACGACCTTAATCTGGCGGTTCTTCCGTGGCCCCCCCCCAAAGCCCAGACCTTAATATCATTGAAAACGTTTGGGCTTTTATTAAAACCAACGAACTATTGATAAAAATAGAAAACGAGAGGGAGCCATCATTGAAATAGCGGAGATTTGGTCCAAATTGACATTAGAATTTGCACAAACTTTGGTAAGGTCAATACCAAAAGACTTCAAGCAGTTATTGATGCCAAAGGTGGTGTTACAAAATATTAGTATTGTATTTATATAAAATAAAGAAATTCTTATGTTGAAATTAGATGTTAAGCTGAAATTTACTAAATTAAGTTGAGTGAAAATACTTTTGAAGCGCAATAAACATGTGAAAATACTATTGACAACTTGCATGCATATTTTCTTTTGCTTTTAAGCTTTGTACTATGAACCGTTATCTTTCGTATTTCTTTTCGACTACCTTCTGCATAGATCAAGCTAAGCGATAAGAACTATTTCAGGCAAATCGGACAACAACAAGAAGAAATATAACAAAAGAAGTTGAAGTTTGCAAATATTGTGCGTTGTGAAAATACTTTGACCACCTCTGTATATAGTTTGCAGGAGCACGGTATTGAACTTAAATGTACTGATGAGGAAATCAAACGCTACATTGGCATTTTATTGTACTTTGGTGTTTTAAAACTACCGCAATTCAGAATGGCGTGGTCAAAGTATTTAAAGATTACCGCAATAACTGATTCAATGCCGCGTGGGAGATTTAAAAAAATAAAACAATGCTTACATTTCAACGACAACGCCAAACAATTAAAAAAAGGGGATTGCAACTATGATAAACTCTACAAGATCCGCCCTTTGTTCAGAATTCTCAAAGAAAATTTTGGAAAAACTAACGCAGGAAGAGCATCAAAGTGTCGATGAGCAAATAATTGCATTCAAAGGTACGTTTTAATTTTCTTTTAAATTAGCTTTATTTTTTAATAATTGCTTTTGTTGCAGGTCGATCCACGCTTAGGCAATATAATCCAAACCTCATAAATGGGGTCTTAAAATGTTTACGCGGGCTGGAATATCTGGATTAGTTTATGATTTTATTGCTATATGTTGGAGAAGGCACTTCTCCTTCTTATGGCTTGGGAATATCATCTTATGTTGTCTTATATTTGGCAGAAAGTCTTCCCAAAGACAAAATTTTAAACTGTATTTTGATAATTGGTTTACGTCTGTAATCCTTCTGATTTCGTTGAAGGAAATAGGAATCTTTGCAACAGGTACCTGTACGTATGATAAAGTTGAACATTGGGTAGTTTTTTGGAGAAAGAGGACGTTGCAGACTGTGCAAAATTGCAACACCGATGACCAAATGCCTTACATGCAAAGTCCATCTGTGCTGCAATAACAATAAAAACTGTTTTTTGTCATACCACACTTAAATTGTCATTATAAAGAAAAATATTTCATATTCTGTGATTTATAAAAAAAAAACAATGCTTACACATCACTACTGCCCGACGTTGCTCACAAGAAAACTTTTGCTACCGCCCAAACTAATGGGCGTGGCATACCTAAAATTTTGCTAAATTTTTTCTAAAATAAATGTAAAACATTAATGATAAAACAAAATTTCACGGGTAAAAAAGTTGGGCGCGAAAGGGTTAAAAGACTTCTGTAGCATACTTTGGAATCCCAGAATCTCACATGCGAATAATGGAAAGTTTTGATCTAAGCTCAGATCATTCTCTAATAATAGTGACATACAGTACAGTAGCTCATATATTGCAAATACAGATATCAATGCATTTAAAAGTTATCTGGAAACAGCTATCAGCCTGGATATCTCGCTAAAATCAGGAGAGGAGAGCTACTGTGGAGCTACTCACAAACAAGATCCATAAAGCAAGCTATATATGTACGAAGCTACCAGCCAGAAACTCACAATCAAATCAGCTCTATCTCTCAGCTGAAACTCCGACAACAAATACAACACAAGAGAAATTTGCGTAAGAGATGGCAAGAAACTCTCTACCCTGCCGACAAAAGATCGTATAACAAGGCTGCATCTGATCGCAGAAAACTACTGTCAACTTTAAGAAATTAATCTCTCGCTGAATATCTTAGAAATCTAGATCCACATTCTTGTAACCACGAACATAATTTATGGAGTAACCAAATATCTCAAGCGACCTGCAAAAGAAACACAGTAGTCCGAAACTCTAATGGCGAATGGCGTAGATCTGATGATGAACAAGCCAAAGCATTTGCTTAACACCTGCACTCTGTATTTCAGCCAAATGATATTGATAACCCGCAAACAGAAAGGGAAGTAGATAACTTTTTTCGAGTCACCGCCAAATGAGCTTACCCATTCGTAAAATCAGTATTAATGAAGTTTCATCAGAAATGGCTAAAAAGTAAAAAGGCTCCAGATTGGGACAAAATAGATGGCATAGCCTTGAAAAAAAAAATGGGCGGTTAGAGTTGACCACTTCCCAAGCCAATGGAAATGTGCAGAAATTATAGGAATCCTTAAACCAAACAAGGCAGAAAATGAAGTGACATTGTACCGTCCCATTAGTTTGTTGTCAATATTTTCTAAAGTATTTAAAAAAAATAATTTTAAAGAGAATGTTGTCAATCTTGGAAGAATTGCTATCATACCCAAACACCAGTTTGGATTCGAAGAGGCCACGGAACCCCTGAGCAATGTCACAGGATTAAAAAATGAAATTTCGTCAGCATTTGAGAGCAAAAAATACTGCACTGCTAAATTTCTTGACGTTTAACAAGCGTTTGATCGAGTCTGGCATGATGGCTTATCATATAAAACCATATTTTATATTATTAAAGTCATACTTAACCAATAGACAATTTTATTTGCAACAAAAAAATGAATACTCGCCCTTGCACTTTATAAAAGCTGGAGTCCCACAAGGAAGAGTCTTAGTAACTGTCTTATACACCCTGTAACGGCAGATATGCCGGTAACAAATACCGCCTGTAACAAATGATACAGCTATATTAGCTACAAGCTCATCTAAAGAGGAAGCCTCACAACTCCTGCAAGCAGAGCTACGCCTTATTGAAAGCTGGTTTCTTATTTGGAAAATTAAAGTCAACGCCCTGAAATCTGCGCAAATAACTTTTGCATTAAGAAGAGGTAACTGCCCAGAAGTGTCATTTAATGGATCAGCAATCCCACAAAGTATCGATCGCAGACTAACGTGGAAACACATAAACATAAAAGCAAAGCGCCAGCAACTAAATCAAAAAAGTTTGAAGATGACCTGGTTGCTTGGCCGAAAATCTGCAACCACTCGGAAAATAAAGTCCGTTTATACAAAGCTATACTAAAGCCCATGTGGACTTATGACATACAGCTTTGGGGTACTGCCAGCAACTCAAATATTGAGATTCTACAATGCTACCAATCAAAAAATATTAAGACAAATTGTTAATGCTCCATTTTATATTTCAAATGCAAGTATCTATAAAGACTTAGGAATCCCTTATGTTAAAGAAGAAATAGCAAAACATATTAAAAATATATAGACAGACCAAGAACACATGAAAATAACTTAGCCTTAAATTTGGTAAATAA\tRC:i:105426\n'
b
diff -r 000000000000 -r 60573349e9ae test-data/out_reads.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_reads.csv Wed Nov 11 21:54:51 2020 +0000
b
@@ -0,0 +1,59 @@
+AssembledSegmentId,EdgeCount,OrientedReadCount,OrientedReadId,VertexCount,EdgeCount
+0,478,29,1-0,138,109
+0,478,29,2-1,137,115
+0,478,29,3-1,85,69
+0,478,29,6-1,146,114
+0,478,29,8-1,161,130
+0,478,29,9-1,167,141
+0,478,29,10-1,223,196
+0,478,29,11-0,231,188
+0,478,29,13-0,292,229
+0,478,29,15-1,274,246
+0,478,29,16-1,262,213
+0,478,29,17-1,305,255
+0,478,29,19-0,139,103
+0,478,29,22-0,232,172
+0,478,29,23-0,360,330
+0,478,29,24-1,282,233
+0,478,29,26-0,346,301
+0,478,29,27-0,147,100
+0,478,29,28-0,245,230
+0,478,29,29-0,220,198
+0,478,29,30-1,179,149
+0,478,29,31-0,166,140
+0,478,29,33-1,140,126
+0,478,29,36-1,105,91
+0,478,29,38-0,90,82
+0,478,29,39-1,68,62
+0,478,29,40-1,85,76
+0,478,29,41-1,87,81
+0,478,29,42-1,77,69
+1,478,29,1-1,138,109
+1,478,29,2-0,137,115
+1,478,29,3-0,85,69
+1,478,29,6-0,146,114
+1,478,29,8-0,161,130
+1,478,29,9-0,167,141
+1,478,29,10-0,223,196
+1,478,29,11-1,231,188
+1,478,29,13-1,292,229
+1,478,29,15-0,274,246
+1,478,29,16-0,262,213
+1,478,29,17-0,305,255
+1,478,29,19-1,139,103
+1,478,29,22-1,232,172
+1,478,29,23-1,360,330
+1,478,29,24-0,282,233
+1,478,29,26-1,346,301
+1,478,29,27-1,147,100
+1,478,29,28-1,245,230
+1,478,29,29-1,220,198
+1,478,29,30-0,179,149
+1,478,29,31-1,166,140
+1,478,29,33-0,140,126
+1,478,29,36-0,105,91
+1,478,29,38-1,90,82
+1,478,29,39-0,68,62
+1,478,29,40-0,85,76
+1,478,29,41-0,87,81
+1,478,29,42-0,77,69