Repository 'phyloseq_plot_ordination'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/phyloseq_plot_ordination

Changeset 2:dfe800a3faaf (2024-03-16)
Previous changeset 1:92e77800ef2c (2024-02-09) Next changeset 3:c35e0075c7a2 (2024-12-03)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
modified:
macros.xml
phyloseq_from_dada2.R
test-data/output.phyloseq
test-data/sequence_table.dada2_sequencetable
added:
phyloseq_from_biom.R
test-data/biom-refseq.fasta
test-data/biom-tree.phy
test-data/rich_dense_otu_table.biom
test-data/rich_dense_otu_table.biom2
test-data/sample_data.tabular
b
diff -r 92e77800ef2c -r dfe800a3faaf macros.xml
--- a/macros.xml Fri Feb 09 21:42:09 2024 +0000
+++ b/macros.xml Sat Mar 16 07:56:05 2024 +0000
b
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.38.0</token>
+    <token name="@TOOL_VERSION@">1.46.0</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">21.01</token>
     <xml name="bio_tools">
@@ -10,8 +10,8 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">bioconductor-phyloseq</requirement>
-            <requirement type="package" version="1.7.1">r-optparse</requirement>
-            <requirement type="package" version="1.3.1">r-tidyverse</requirement>
+            <requirement type="package" version="1.7.3">r-optparse</requirement>
+            <requirement type="package" version="2.0.0">r-tidyverse</requirement>
         </requirements>
     </xml>
     <xml name="phyloseq_input">
b
diff -r 92e77800ef2c -r dfe800a3faaf phyloseq_from_biom.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_from_biom.R Sat Mar 16 07:56:05 2024 +0000
[
@@ -0,0 +1,30 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("phyloseq"))
+suppressPackageStartupMessages(library("tidyverse"))
+
+option_list <- list(
+    make_option(c("--BIOMfilename"), action = "store", dest = "biom", help = "Input BIOM file"),
+    make_option(c("--treefilename"), action = "store", dest = "tree", default = NULL, help = "Input Tree newick/nexus file"),
+    make_option(c("--parseFunction"), action = "store", dest = "parsefoo", default = "parse_taxonomy_default", help = "Parse function parse_taxonomy_default/read_tree_greengenes"),
+    make_option(c("--refseqfilename"), action = "store", dest = "sequences", default = NULL, help = "Input Sequence fasta file"),
+    make_option(c("--output"), action = "store", dest = "output", help = "RDS output")
+)
+
+parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
+args <- parse_args(parser, positional_arguments = TRUE)
+opt <- args$options
+
+parsefoo <- get(opt$parsefoo)
+phyloseq_obj <- import_biom(
+    BIOMfilename = opt$biom,
+    treefilename = opt$tree,
+    refseqfilename = opt$sequences,
+    parseFunction = parsefoo
+)
+
+print(phyloseq_obj)
+
+# save R object to file
+saveRDS(phyloseq_obj, file = opt$output, compress = TRUE)
b
diff -r 92e77800ef2c -r dfe800a3faaf phyloseq_from_dada2.R
--- a/phyloseq_from_dada2.R Fri Feb 09 21:42:09 2024 +0000
+++ b/phyloseq_from_dada2.R Sat Mar 16 07:56:05 2024 +0000
b
@@ -7,6 +7,7 @@
 option_list <- list(
     make_option(c("--sequence_table"), action = "store", dest = "sequence_table", help = "Input sequence table"),
     make_option(c("--taxonomy_table"), action = "store", dest = "taxonomy_table", help = "Input taxonomy table"),
+    make_option(c("--sample_table"), action = "store", default = NULL, dest = "sample_table", help = "Input sample table"),
     make_option(c("--output"), action = "store", dest = "output", help = "RDS output")
 )
 
@@ -15,20 +16,40 @@
 opt <- args$options
 # The input sequence_table is an integer matrix
 # stored as tabular (rows = samples, columns = ASVs).
-seq_table_numeric_matrix <- data.matrix(read.table(opt$sequence_table, sep = "\t"))
+seq_table_numeric_matrix <- data.matrix(read.table(opt$sequence_table, header = T, sep = "\t", row.names = 1, check.names = FALSE))
 # The input taxonomy_table is a table containing
 # the assigned taxonomies exceeding the minBoot
 # level of bootstrapping confidence. Rows correspond
 # to sequences, columns to taxonomic levels. NA
 # indicates that the sequence was not consistently
 # classified at that level at the minBoot threshold.
-tax_table_matrix <- as.matrix(read.table(opt$taxonomy_table, header = FALSE, sep = "\t"))
+tax_table_matrix <- as.matrix(read.table(opt$taxonomy_table, header = T, sep = "\t", row.names = 1, check.names = FALSE))
 # Construct a tax_table object.  The rownames of
 # tax_tab must match the OTU names (taxa_names)
 # of the otu_table defined below.
 tax_tab <- tax_table(tax_table_matrix)
+
 # Construct an otu_table object.
 otu_tab <- otu_table(seq_table_numeric_matrix, taxa_are_rows = TRUE)
+
 # Construct a phyloseq object.
 phyloseq_obj <- phyloseq(otu_tab, tax_tab)
+if (!is.null(opt$sample_table)) {
+    sample_tab <- sample_data(
+        read.table(opt$sample_table, header = T, sep = "\t", row.names = 1, check.names = FALSE)
+    )
+    phyloseq_obj <- merge_phyloseq(phyloseq_obj, sample_tab)
+}
+
+# use short names for our ASVs and save the ASV sequences
+# refseq slot of the phyloseq object as described in
+# https://benjjneb.github.io/dada2/tutorial.html
+dna <- Biostrings::DNAStringSet(taxa_names(phyloseq_obj))
+names(dna) <- taxa_names(phyloseq_obj)
+phyloseq_obj <- merge_phyloseq(phyloseq_obj, dna)
+taxa_names(phyloseq_obj) <- paste0("ASV", seq(ntaxa(phyloseq_obj)))
+
+print(phyloseq_obj)
+
+# save R object to file
 saveRDS(phyloseq_obj, file = opt$output, compress = TRUE)
b
diff -r 92e77800ef2c -r dfe800a3faaf test-data/biom-refseq.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/biom-refseq.fasta Sat Mar 16 07:56:05 2024 +0000
b
@@ -0,0 +1,29 @@
+>GG_OTU_1
+AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCA
+TGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCCGTGTAGCGGTG
+AAATGCGTAGAGATGGGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGGCTTTAACTGACGCTGAGGCACGAAAGCGTG
+GGTAGCAAACAGGATTAGATACCCTGGTAGTCCACGCTGTAAACGATGATTACTAGGTGTGGGGGTCTGACCCCTTCCGT
+GCCGGAGTTAACAC
+>GG_OTU_2
+TACGTAGGGAGCAAGCGTTATCCGGATTTATTGGGTGTAAAGGGTGCGTAGACGGGAGAACAAGTTAGTTGTGAAAGCCC
+TCGGCTTAACTGAGGAACTGCAACTAAAACTATTTTTCTTGAGTGCAGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTG
+AAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACTGTAACTGACGTTGAGGCACGAAAGTGTG
+GGGAGCAAACAGGATTAGATACCCTGGTAGTCCACACCGTAAACGATGGATACTAGGTGTAGGAGATGATTTCATCATCT
+GTGCCGAAAGCAAACGCAATAAGTATCCCACCTGGGGAGTACGGCCGCAAGGTTGAAACTCAAAGGATTGACGGGGCCCG
+CACAAGCAGTGGAGTATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGGCTTGACATA
+>GG_OTU_3
+TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATCACAAGTCAGAAGTGAAAAATC
+CGGGGGCTCCAACCCCGGAACTGCTTTTGAAACTGTGGAGCTGGAGTGCAGGAGAGGTAAGCGGAATTCCTAGTGTAGCG
+GTAGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGCTTACTGGACTGTAACTGACGTTGAGGCTCGAAAGC
+GTGGGGAGC
+>GG_OTU_4
+TACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGTGCGGCAAGTCTGATGTGAAAGCCC
+GGGGCTCAACCCCGGTACTGCATTGGAAACTGTCGTACTAGAGTGTCGGAGGGGTAAGCGGAATTCCTAGTGTAGCGGTG
+AAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATAACTGACGCTGAGGCTCGAAAGCGTG
+GGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAATACTAGGTGTTGGGAAGCATTGCTTCTCGGT
+GCCGTCGCAAACGCAGTAAGTATTCCACCTGGGGGATACGTTTCGACAAGAATAGAAACTACAAAAGGAATTAGGACGGG
+GACCCGCACAAGCGGTGAGCATGTGGTTAATCGAAGCAACGCGAAGAACCTTA
+>GG_OTU_5
+AACGTAGGGTGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAGACAAGTTGGAAGTGAAACCATG
+GGCTCAACCCATGAATTGCTTTCAAAACTGTTTTTCTTGAGTTAGTGCAGAGGTAGATGGAATTCCCGGTGTAGCGGTGG
+AATGCGTAGATATCGGGA
b
diff -r 92e77800ef2c -r dfe800a3faaf test-data/biom-tree.phy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/biom-tree.phy Sat Mar 16 07:56:05 2024 +0000
b
@@ -0,0 +1,1 @@
+(((GG_OTU_1:0.00892,GG_OTU_2:0.01408)1.000.2:0.12196,GG_OTU_3:0.16022)0.995.2:0.01869,(GG_OTU_4:0.08976,GG_OTU_5:0.0665)0.766:0.09714)0.764.3;
b
diff -r 92e77800ef2c -r dfe800a3faaf test-data/output.phyloseq
b
Binary file test-data/output.phyloseq has changed
b
diff -r 92e77800ef2c -r dfe800a3faaf test-data/rich_dense_otu_table.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rich_dense_otu_table.biom Sat Mar 16 07:56:05 2024 +0000
[
@@ -0,0 +1,56 @@
+{
+     "id":null,
+     "format": "Biological Observation Matrix 1.0.0-dev",
+     "format_url": "http://biom-format.org",
+     "type": "OTU table",
+     "generated_by": "QIIME revision XYZ",
+     "date": "2011-12-19T19:00:00",  
+     "rows":[
+        {"id":"GG_OTU_1", "metadata":{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}},
+        {"id":"GG_OTU_2", "metadata":{"taxonomy":["k__Bacteria", "p__Cyanobacteria", "c__Nostocophycideae", "o__Nostocales", "f__Nostocaceae", "g__Dolichospermum", "s__"]}},
+        {"id":"GG_OTU_3", "metadata":{"taxonomy":["k__Archaea", "p__Euryarchaeota", "c__Methanomicrobia", "o__Methanosarcinales", "f__Methanosarcinaceae", "g__Methanosarcina", "s__"]}},
+        {"id":"GG_OTU_4", "metadata":{"taxonomy":["k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Halanaerobiales", "f__Halanaerobiaceae", "g__Halanaerobium", "s__Halanaerobiumsaccharolyticum"]}},
+        {"id":"GG_OTU_5", "metadata":{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}}
+        ],  
+     "columns":[
+        {"id":"Sample1", "metadata":{
+                                 "BarcodeSequence":"CGCTTATCGAGA",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"gut",
+                                 "Description":"human gut"}},
+        {"id":"Sample2", "metadata":{
+                                 "BarcodeSequence":"CATACCAGTAGC",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"gut",
+                                 "Description":"human gut"}},
+        {"id":"Sample3", "metadata":{
+                                 "BarcodeSequence":"CTCTCTACCTGT",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"gut",
+                                 "Description":"human gut"}},
+        {"id":"Sample4", "metadata":{
+                                 "BarcodeSequence":"CTCTCGGCCTGT",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"skin",
+                                 "Description":"human skin"}},
+        {"id":"Sample5", "metadata":{
+                                 "BarcodeSequence":"CTCTCTACCAAT",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"skin",
+                                 "Description":"human skin"}},
+        {"id":"Sample6", "metadata":{
+                                 "BarcodeSequence":"CTAACTACCAAT",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"skin",
+                                 "Description":"human skin"}}
+                ],
+     "matrix_type": "dense",
+     "matrix_element_type": "int",
+     "shape": [5,6],
+     "data":  [[0,0,1,0,0,0], 
+               [5,1,0,2,3,1],
+               [0,0,1,4,2,0],
+               [2,1,1,0,0,1],
+               [0,1,1,0,0,0]]
+    }
+
b
diff -r 92e77800ef2c -r dfe800a3faaf test-data/rich_dense_otu_table.biom2
b
Binary file test-data/rich_dense_otu_table.biom2 has changed
b
diff -r 92e77800ef2c -r dfe800a3faaf test-data/sample_data.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_data.tabular Sat Mar 16 07:56:05 2024 +0000
b
@@ -0,0 +1,3 @@
+ Property Number
+SRR14190457 Early 1
+SRR14190458 Late 2
\ No newline at end of file
b
diff -r 92e77800ef2c -r dfe800a3faaf test-data/sequence_table.dada2_sequencetable
--- a/test-data/sequence_table.dada2_sequencetable Fri Feb 09 21:42:09 2024 +0000
+++ b/test-data/sequence_table.dada2_sequencetable Sat Mar 16 07:56:05 2024 +0000
b
b'@@ -1,65 +1,65 @@\n-\tSRR14190457\n-GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGTGCAGGCGGTTCAATAAGTCTGATGTGAAAGCCTTCGGCTCAACCGGAGAATTGCATCAGAAACTGTTGAACTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\t178\n-GTGTCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGACTGGTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGTCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGGATTAGAAACCCCCGTAGTCC\t136\n-GTGTCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCCGCGCCGGGTACGGGCGGGCTTGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCAGTAGTCC\t129\n-GTGTCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATATCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCCTGCTAAGCTGCAACTGACATTGAGGCTCGAAAGTGTGGGTATCAAACAGGATTAGATACCCCCGTAGTCC\t128\n-GTGTCAGCAGCCGCGGTAATACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTGTGTAAGTCTGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTGGAAACTATGTAACTAGAGTGTCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATCACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCGCGTAGTCC\t110\n-GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCCGTAGTCC\t104\n-GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCCCGTAGTCC\t97\n-GTGTCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGCCAGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTTGGAACTGTCAGGCTAGAGTGTCGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACGATGACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\t90\n-GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTCGCAGGCGGTTCCTTAAGTCTGATGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAGAGTGGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGACTCTCTGGTCTGTAACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCAGTAGTCC\t88\n-GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\t86\n-GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\t84\n-GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAAATAAGTCTAATGTGAAAGCCCTCGGCTTAACCGAGGAACTGCATCGGAAACTGTTTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\t83\n-GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTGCTTAGGTCTGATGTGAAAGCCTTCGGCTTAACCGAAGAAGTGCATCGGAAACCGGGCGACTTGAGTGCAGAAGAGGACAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTGTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCAGTAGTCC\t71\n-GTGTCAGCAGCCGCGGTAATACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAGTGGCAAGTCTGATGTGAAAACCC'..b'CAGTGGCGAAGGCGGATAACTGGACGGCAACTGACGGTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCTTGTAGTCC\t28\t71\n+GTGTCAGCAGCCGCGGTAAAACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAAGCAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGAAATGCGTAGATATTGGGAGGAACACCAGTGGCGAAGGCGCCTTTCTGGACTGTGTCTGACGCTGAGATGCGAAAGCCAGGGTAGCGAACGGGATTAGATACCCCCGTAGTCC\t28\t71\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGATGGACAAGTCTGATGTGAAAGGCTGGGGCTCAACCCCGGGACTGCATTGGAAACTGCCCGTCTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATCACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCGCGTAGTCC\t27\t83\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGCTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCGATGGCGAAGGCAGGTCTCTGGGCCGTCACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCCGTAGTCC\t27\t84\n+GTGTCAGCAGCCGCGGTAATACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGCTCGTAGGTGGTTTGTCGCGTCGTCTGTGAAATTCTGGGGCTTAACTCCGGGCGTGCAGGCGATACGGGCATAACTTGAGTGCTGTAGGGGTAACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTTACTGGGCAGTTACTGACGCTGAGGAGCGAAAGCATGGGTAGCGAACAGGATTAGATACCCCAGTAGTCC\t26\t86\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCTAGTAGTCC\t26\t88\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\t25\t90\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCTTGTAGTCC\t24\t97\n+GTGTCAGCAGCCGCGGTAATACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCAGCAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTACTGAGCTAGAGTACGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAACACCAGTGGCGAAGGCGACTCTCTGGTCTGTAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCGCGTAGTCC\t22\t104\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCCGTAGTCC\t22\t110\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCGCGTAGTCC\t22\t128\n+GTGTCAGCAGCCGCGGTGATACGTAGGGTGCGAGCGTTGTCCGGATTTATTGGGCGTAAAGGGCTCGTAGGTGGTTGATCGCGTCGGAAGTGTAATCTTGGGGCTTAACCCTGAGCGTGCTTTCGATACGGGTTGACTTGAGGAAGGTAGGGGAGAATGGAATTCCTGGTGGAGCGGTGGAATGCGCAGATATCAGGAGGAACACCAGTGGCGAAGGCGGTTCTCTGGGCCTTTCCTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGCTTAGATACCCCTGTAGTCC\t16\t129\n+GTGTCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGTCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGCGGTCTTGAGTGAGCGAGAAGTACGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCGTACCGGCGCTCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGGATTAGATACCCCCGTAGTCC\t15\t136\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGCGTAGGCGGGCGTATAAGTCAGTGGTGAAATCCTGGAGCTTAACTCCAGAACTGCCATTGATACTATATGTCTTGAATATGGTGGAGGTAAGCGGAATATGTCATGTAGCGGTGAAATGCATAGATATGACATAGAACACCTATTGCGAAGGCAGCTTACTACGCCTATATTGACGCTGAGGCACGAAAGCGTGGGGATCAAACAGGATTAGAAACCCGAGTAGTCC\t11\t178\n'