Repository 'repenrich'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/repenrich

Changeset 13:530626b0757c (2024-04-02)
Previous changeset 12:89e05f831259 (2024-03-18) Next changeset 14:bf866bedd4b4 (2024-04-20)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
modified:
RepEnrich.py
RepEnrich_setup.py
edgeR_repenrich.R
edger-repenrich.xml
macros.xml
repenrich.xml
test-data/Normalized_counts_file.tab
test-data/edgeR_plots.pdf
test-data/edgeR_result_file.tab
added:
test-data/chrY-1-500k.fa
test-data/chrY-1-500k.fa.out
test-data/chrY-500k.R1.fastqsanger.gz
test-data/chrY-500k.R2.fastqsanger.gz
test-data/chrY_paired_class_fraction_counts.tab
test-data/chrY_paired_family_fraction_counts.tab
test-data/chrY_paired_fraction_counts.tab
test-data/chrY_single_class_fraction_counts.tab
test-data/chrY_single_family_fraction_counts.tab
test-data/chrY_single_fraction_counts.tab
removed:
test-data/Samp-paired_class_fraction_counts.tab
test-data/Samp-paired_family_fraction_counts.tab
test-data/Samp-paired_fraction_counts.tab
test-data/Samp.fastq
test-data/Samp_L.fastq
test-data/Samp_R.fastq
test-data/Samp_class_fraction_counts.tabular
test-data/Samp_family_fraction_counts.tabular
test-data/Samp_fraction_counts.tabular
test-data/aligned_353.tab
test-data/aligned_354.tab
test-data/aligned_355.tab
test-data/aligned_356.tab
test-data/aligned_reads.tab
b
diff -r 89e05f831259 -r 530626b0757c RepEnrich.py
--- a/RepEnrich.py Mon Mar 18 09:39:44 2024 +0000
+++ b/RepEnrich.py Tue Apr 02 21:16:37 2024 +0000
[
b'@@ -4,8 +4,8 @@\n import shlex\n import subprocess\n import sys\n-\n-import numpy\n+from collections import defaultdict\n+from concurrent.futures import ProcessPoolExecutor\n \n \n parser = argparse.ArgumentParser(description=\'\'\'\n@@ -17,26 +17,13 @@\n                     help=\'RepeatMasker.org annotation file for your\\\n                           organism. The file may be downloaded from\\\n                           RepeatMasker.org. E.g. hg19_repeatmasker.txt\')\n-parser.add_argument(\'--outputfolder\', action=\'store\', metavar=\'outputfolder\',\n-                    help=\'Folder that will contain results. Should be the\\\n-                          same as the one used for RepEnrich_setup.\\\n-                          Example: ./outputfolder\')\n-parser.add_argument(\'--outputprefix\', action=\'store\', metavar=\'outputprefix\',\n-                    help=\'Prefix name for Repenrich output files.\')\n-parser.add_argument(\'--setup_folder\', action=\'store\', metavar=\'setup_folder\',\n-                    help=\'Folder produced by RepEnrich_setup which contains\\\n-                    repeat element pseudogenomes.\')\n+parser.add_argument(\'--alignment_bam\', action=\'store\', metavar=\'alignment_bam\',\n+                    help=\'Bam alignments of unique mapper reads.\')\n parser.add_argument(\'--fastqfile\', action=\'store\', metavar=\'fastqfile\',\n                     help=\'File of fastq reads mapping to multiple\\\n                           locations. Example: /data/multimap.fastq\')\n-parser.add_argument(\'--alignment_bam\', action=\'store\', metavar=\'alignment_bam\',\n-                    help=\'Bam alignments of unique mapper reads.\')\n-parser.add_argument(\'--pairedend\', action=\'store\', dest=\'pairedend\',\n-                    default=\'FALSE\',\n-                    help=\'Change to TRUE for paired-end fastq files.\\\n-                          Default FALSE\')\n parser.add_argument(\'--fastqfile2\', action=\'store\', dest=\'fastqfile2\',\n-                    metavar=\'fastqfile2\', default=\'none\',\n+                    metavar=\'fastqfile2\', default=\'\',\n                     help=\'fastqfile #2 when using paired-end option.\\\n                           Default none\')\n parser.add_argument(\'--cpus\', action=\'store\', dest=\'cpus\', metavar=\'cpus\',\n@@ -48,18 +35,16 @@\n \n # parameters\n annotation_file = args.annotation_file\n-outputfolder = args.outputfolder\n-outputfile_prefix = args.outputprefix\n-setup_folder = args.setup_folder\n-repeat_bed = os.path.join(setup_folder, \'repnames.bed\')\n unique_mapper_bam = args.alignment_bam\n fastqfile_1 = args.fastqfile\n fastqfile_2 = args.fastqfile2\n cpus = args.cpus\n-b_opt = "-k1 -p 1 --quiet"\n # Change if simple repeats are differently annotated in your organism\n simple_repeat = "Simple_repeat"\n-paired_end = args.pairedend\n+if args.fastqfile2:\n+    paired_end = True\n+else:\n+    paired_end = False\n \n # check that the programs we need are available\n try:\n@@ -73,260 +58,147 @@\n     print("Error: Bowtie or bedtools not loaded")\n     raise\n \n-# define a csv reader that reads space deliminated files\n-print(\'Preparing for analysis using RepEnrich...\')\n-csv.field_size_limit(sys.maxsize)\n \n-\n-def import_text(filename, separator):\n-    for line in csv.reader(open(filename), delimiter=separator,\n-                           skipinitialspace=True):\n-        if line:\n-            yield line\n+def starts_with_numerical(list):\n+    try:\n+        if len(list) == 0:\n+            return False\n+        int(list[0])\n+        return True\n+    except ValueError:\n+        return False\n \n \n-# build dictionaries to convert repclass and rep families\n-repeatclass, repeatfamily = {}, {}\n-repeats = import_text(annotation_file, \' \')\n-# skip three first lines of the iterator\n-for line in range(3):\n-    next(repeats)\n-for repeat in repeats:\n-    classfamily = []\n-    classfamily = repeat[10].split(\'/\')\n-    matching_repeat = repeat[9].translate(str.maketrans(\'()/\', \'___\'))\n-    repeatclass[matching_repeat] = classfamily[0]\n-    if len(classfamily) == 2:\n-        repeatfamily[matching_repeat] = classfamily[1]\n-    e'..b' mutimapper repeat reads\')\n \n-# building the total counts for repeat element enrichment...\n-for x in counts.keys():\n-    count = counts[x]\n-    x = x.strip(\',\').split(\',\')\n-    for i in x:\n-        reptotalcounts[rev_repeat_key[int(i)]] += int(count)\n-# building the fractional counts for repeat element enrichment...\n-for x in counts.keys():\n-    count = counts[x]\n-    x = x.strip(\',\')    .split(\',\')\n-    splits = len(x)\n-    for i in x:\n-        fractionalcounts[rev_repeat_key[int(i)]] += float(\n-            numpy.divide(float(count), float(splits)))\n-# building categorized table of repeat element enrichment...\n-repcounts = {}\n-repcounts[\'other\'] = 0\n-for key in counts.keys():\n-    key_list = key.strip(\',\').split(\',\')\n-    repname = \'\'\n+# Populate fractionalcounts\n+for key, count in counts.items():\n+    key_list = key.split(\',\')\n     for i in key_list:\n-        repname = os.path.join(repname, rev_repeat_key[int(i)])\n-    repcounts[repname] = counts[key]\n-# building the total counts for class enrichment...\n-for key in reptotalcounts.keys():\n-    classtotalcounts[repeatclass[key]] += reptotalcounts[key]\n-# building total counts for family enrichment...\n-for key in reptotalcounts.keys():\n-    familytotalcounts[repeatfamily[key]] += reptotalcounts[key]\n-# building unique counts table\n-repcounts2 = {}\n-for rep in repeat_list:\n-    if "/" + rep in repcounts:\n-        repcounts2[rep] = repcounts["/" + rep]\n-    else:\n-        repcounts2[rep] = 0\n-# building the fractionalcounts counts for class enrichment...\n-for key in fractionalcounts.keys():\n-    classfractionalcounts[repeatclass[key]] += fractionalcounts[key]\n-# building fractional counts for family enrichment...\n-for key in fractionalcounts.keys():\n-    familyfractionalcounts[repeatfamily[key]] += fractionalcounts[key]\n+        fractionalcounts[i] += count / len(key_list)\n \n-# print output to file of the categorized counts and total overlapping counts:\n-print(\'Writing final output...\')\n-with open(f"{os.path.join(outputfolder, outputfile_prefix)}_"\n-          f"class_fraction_counts.txt", \'w\') as fout:\n-    for key in sorted(classfractionalcounts.keys()):\n+# build repeat_ref for easy access to rep class and rep families\n+repeat_ref = defaultdict(dict)\n+repeats = import_text(annotation_file, \' \')\n+for repeat in repeats:\n+    repeat_name = repeat[9].translate(str.maketrans(\'()/\', \'___\'))\n+    try:\n+        repclass = repeat[10].split(\'/\')[0]\n+        repfamily = repeat[10].split(\'/\')[1]\n+    except IndexError:\n+        repclass, repfamily = repeat[10], repeat[10]\n+    repeat_ref[repeat_name][\'class\'] = repclass\n+    repeat_ref[repeat_name][\'family\'] = repfamily\n+\n+# Populate classfractionalcounts and familyfractionalcounts\n+for key, value in fractionalcounts.items():\n+    classfractionalcounts[repeat_ref[key][\'class\']] += value\n+    familyfractionalcounts[repeat_ref[key][\'family\']] += value\n+\n+# print class-, family- and fraction-repeats counts to files\n+with open("class_fraction_counts.tsv", \'w\') as fout:\n+    for key in sorted(classfractionalcounts):\n         fout.write(f"{key}\\t{classfractionalcounts[key]}\\n")\n \n-with open(f"{os.path.join(outputfolder, outputfile_prefix)}_"\n-          f"family_fraction_counts.txt", \'w\') as fout:\n-    for key in sorted(familyfractionalcounts.keys()):\n+with open("family_fraction_counts.tsv", \'w\') as fout:\n+    for key in sorted(familyfractionalcounts):\n         fout.write(f"{key}\\t{familyfractionalcounts[key]}\\n")\n \n-with open(f"{os.path.join(outputfolder, outputfile_prefix)}_"\n-          f"fraction_counts.txt", \'w\') as fout:\n-    for key in sorted(fractionalcounts.keys()):\n-        fout.write(f"{key}\\t{repeatclass[key]}\\t{repeatfamily[key]}\\t"\n-                   f"{int(fractionalcounts[key])}\\n")\n+with open("fraction_counts.tsv", \'w\') as fout:\n+    for key in sorted(fractionalcounts):\n+        fout.write(f"{key}\\t{repeat_ref[key][\'class\']}\\t"\n+                   f"{repeat_ref[key][\'family\']}\\t"\n+                   f"{fractionalcounts[key]}\\n")\n'
b
diff -r 89e05f831259 -r 530626b0757c RepEnrich_setup.py
--- a/RepEnrich_setup.py Mon Mar 18 09:39:44 2024 +0000
+++ b/RepEnrich_setup.py Tue Apr 02 21:16:37 2024 +0000
[
@@ -5,6 +5,9 @@
 import shlex
 import subprocess
 import sys
+from collections import defaultdict
+from concurrent.futures import ProcessPoolExecutor
+
 
 from Bio import SeqIO
 from Bio.Seq import Seq
@@ -22,10 +25,6 @@
 parser.add_argument('--genomefasta', action='store', metavar='genomefasta',
                     help='''Genome of interest in fasta format.\
                          Example: mm9.fa''')
-parser.add_argument('--setup_folder', action='store', metavar='setup_folder',
-                    help='''Folder that contains bowtie indexes of repeats and\
-                         repeat element psuedogenomes.\
-                         Example working/setup''')
 parser.add_argument('--gaplength', action='store', dest='gaplength',
                     metavar='gaplength', default='200', type=int,
                     help='''Length of the N-spacer in the\
@@ -36,6 +35,10 @@
                          repeat pseudogenomes. Flanking length should be set\
                          according to the length of your reads.\
                          Default 25, for 50 nt reads''')
+parser.add_argument('--cpus', action='store', dest='cpus', metavar='cpus',
+                    default="1", type=int,
+                    help='Number of CPUs. The more cpus the\
+                          faster RepEnrich performs. Default: "1"')
 args = parser.parse_args()
 
 # parameters from argsparse
@@ -43,7 +46,7 @@
 flankingl = args.flankinglength
 annotation_file = args.annotation_file
 genomefasta = args.genomefasta
-setup_folder = args.setup_folder
+cpus = args.cpus
 
 # check that the programs we need are available
 try:
@@ -54,56 +57,51 @@
     print("Error: Bowtie not available in the path")
     raise
 
-# Define a text importer
-csv.field_size_limit(sys.maxsize)
 
-
-def import_text(filename, separator):
-    for line in csv.reader(open(os.path.realpath(filename)),
-                           delimiter=separator, skipinitialspace=True):
-        if line:
-            yield line
+def starts_with_numerical(list):
+    try:
+        if len(list) == 0:
+            return False
+        int(list[0])
+        return True
+    except ValueError:
+        return False
 
 
-# Make a setup folder
-if not os.path.exists(setup_folder):
-    os.makedirs(setup_folder)
+# define a text importer for .out/.txt format of repbase
+def import_text(filename, separator):
+    csv.field_size_limit(sys.maxsize)
+    file = csv.reader(open(filename), delimiter=separator,
+                      skipinitialspace=True)
+    return [line for line in file if starts_with_numerical(line)]
+
+
 # load genome into dictionary and compute length
 g = SeqIO.to_dict(SeqIO.parse(genomefasta, "fasta"))
-idxgenome, lgenome, genome = {}, {}, {}
+genome = defaultdict(dict)
 
-for k, chr in enumerate(g.keys()):
-    genome[chr] = g[chr].seq
-    lgenome[chr] = len(genome[chr])
-    idxgenome[chr] = k
+for chr in g.keys():
+    genome[chr]['sequence'] = g[chr].seq
+    genome[chr]['length'] = len(g[chr].seq)
 
 # Build a bedfile of repeatcoordinates to use by RepEnrich region_sorter
-repeat_elements = []
-# these dictionaries will contain lists
-rep_chr, rep_start, rep_end = {}, {}, {}
-fin = import_text(annotation_file, ' ')
-with open(os.path.join(setup_folder, 'repnames.bed'), 'w') as fout:
-    for i in range(3):
-        next(fin)
-    for line in fin:
+repeat_elements = set()
+rep_coords = defaultdict(list)  # Merged dictionary for coordinates
+
+with open('repnames.bed', 'w') as fout:
+    f_in = import_text(annotation_file, ' ')
+    for line in f_in:
         repname = line[9].translate(str.maketrans('()/', '___'))
-        if repname not in repeat_elements:
-            repeat_elements.append(repname)
-        repchr = line[4]
-        repstart = line[5]
-        repend = line[6]
-        fout.write('\t'.join([repchr, repstart, repend, repname]) + '\n')
-        if repname in rep_chr:
-            rep_chr[repname].append(repchr)
-            rep_start[repname].append(repstart)
-            rep_end[repname].append(repend)
-        else:
-            rep_chr[repname] = [repchr]
-            rep_start[repname] = [repstart]
-            rep_end[repname] = [repend]
+        repeat_elements.add(repname)
+        repchr, repstart, repend = line[4], line[5], line[6]
+        fout.write(f"{repchr}\t{repstart}\t{repend}\t{repname}\n")
+        rep_coords[repname].extend([repchr, repstart, repend])
+# repeat_elements now contains the unique repeat names
+# rep_coords is a dictionary where keys are repeat names and values are lists
+# containing chromosome, start, and end coordinates for each repeat instance
 
-# sort repeat_elements and print them in repgenomes_key.txt
-with open(os.path.join(setup_folder, 'repgenomes_key.txt'), 'w') as fout:
+# sort repeat_elements and print them in repeatIDs.txt
+with open('repeatIDs.txt', 'w') as fout:
     for i, repeat in enumerate(sorted(repeat_elements)):
         fout.write('\t'.join([repeat, str(i)]) + '\n')
 
@@ -111,24 +109,41 @@
 spacer = ''.join(['N' for i in range(gapl)])
 
 # generate metagenomes and save them to FASTA files for bowtie build
-for repname in rep_chr:
+for repname in rep_coords:
     metagenome = ''
-    for i, repeat in enumerate(rep_chr[repname]):
-        try:
-            chromosome = rep_chr[repname][i]
-            start = max(int(rep_start[repname][i]) - flankingl, 0)
-            end = min(int(rep_end[repname][i]) + flankingl,
-                      int(lgenome[chr])-1) + 1
-            metagenome = f"{metagenome}{spacer}{genome[chromosome][start:end]}"
-        except KeyError:
-            print("Unrecognised Chromosome: " + rep_chr[repname][i])
+    # iterating coordinate list by block of 3 (chr, start, end)
+    block = 3
+    for i in range(0, len(rep_coords[repname]) - block + 1, block):
+        batch = rep_coords[repname][i:i+block]
+        print(batch)
+        chromosome = batch[0]
+        start = max(int(batch[1]) - flankingl, 0)
+        end = min(int(batch[2]) + flankingl,
+                  int(genome[chromosome]['length'])-1) + 1
+        metagenome = (
+            f"{metagenome}{spacer}"
+            f"{genome[chromosome]['sequence'][start:end]}"
+            )
 
     # Create Fasta of repeat pseudogenome
-    fastafilename = f"{os.path.join(setup_folder, repname)}.fa"
+    fastafilename = f"{repname}.fa"
     record = SeqRecord(Seq(metagenome), id=repname, name='', description='')
     SeqIO.write(record, fastafilename, "fasta")
 
-    # Generate repeat pseudogenome bowtie index
-    bowtie_build_cmd = ["bowtie-build", "-f", fastafilename,
-                        os.path.join(setup_folder, repname)]
-    subprocess.run(bowtie_build_cmd, check=True)
+
+def bowtie_build(args):
+    """
+    Function to be executed in parallel by ProcessPoolExecutor.
+    """
+    try:
+        bowtie_base, fasta = args
+        command = shlex.split(f"bowtie-build -f {fasta} {bowtie_base}")
+        squash = subprocess.run(command, capture_output=True, text=True)
+        return squash.stdout
+    except Exception as e:
+        return str(e)
+
+
+args_list = [(name, f"{name}.fa") for name in rep_coords]
+with ProcessPoolExecutor(max_workers=cpus) as executor:
+    executor.map(bowtie_build, args_list)
b
diff -r 89e05f831259 -r 530626b0757c edgeR_repenrich.R
--- a/edgeR_repenrich.R Mon Mar 18 09:39:44 2024 +0000
+++ b/edgeR_repenrich.R Tue Apr 02 21:16:37 2024 +0000
[
@@ -19,7 +19,6 @@
 options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
 
 # get options, using the spec as defined by the enclosed list.
-# we read the options from the default: commandArgs(TRUE).
 spec <- matrix(
     c(
         "quiet", "q", 0, "logical",
@@ -30,8 +29,6 @@
         "levelNameB", "B", 1, "character",
         "levelAfiles", "a", 1, "character",
         "levelBfiles", "b", 1, "character",
-        "alignmentA", "i", 1, "character",
-        "alignmentB", "j", 1, "character",
         "plots", "p", 1, "character"
     ),
     byrow = TRUE, ncol = 4
@@ -65,17 +62,7 @@
     counts <- cbind(counts, listB[[element]][, 4])
 }
 colnames(counts) <- c(names(listA[-1]), names(listB[-1]))
-
-# build aligned counts vector
-filesi <- fromJSON(opt$alignmentA, method = "C", unexpected.escape = "error")
-filesj <- fromJSON(opt$alignmentB, method = "C", unexpected.escape = "error")
-sizes <- c()
-for (file in filesi) {
-    sizes <- c(sizes, read.delim(file, header = TRUE)[1, 1])
-}
-for (file in filesj) {
-    sizes <- c(sizes, read.delim(file, header = TRUE)[1, 1])
-}
+sizes <- colSums(counts)
 
 # build a meta data object
 meta <- data.frame(
@@ -187,6 +174,3 @@
 results$log2FC <- format(results$log2FC, digits = 5)
 results$FDR <- format(results$FDR, digits = 5)
 write.table(results, opt$outfile, quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
-
-cat("Session information:\n\n")
-sessionInfo()
b
diff -r 89e05f831259 -r 530626b0757c edger-repenrich.xml
--- a/edger-repenrich.xml Mon Mar 18 09:39:44 2024 +0000
+++ b/edger-repenrich.xml Tue Apr 02 21:16:37 2024 +0000
b
@@ -3,7 +3,7 @@
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements"/>
+    <expand macro="edgeR_requirements"/>
     <stdio>
         <regex match="Execution halted"
            source="both"
@@ -46,20 +46,6 @@
             $factorlevelsB.reverse()
             --levelBfiles '#echo json.dumps(factorlevelsB)#'
 
-            #set $alignedA = list()
-            #for file in $alignmentFiles_A:
-                $alignedA.append(str($file))
-            #end for
-            $alignedA.reverse()
-            --alignmentA '#echo json.dumps(alignedA)#' 
-
-            #set $alignedB = list()
-            #for file in $alignmentFiles_B:
-                $alignedB.append(str($file))
-            #end for
-            $alignedB.reverse()
-            --alignmentB '#echo json.dumps(alignedB)#'
-
             -o 'edger_out'
 
             -p '$plots'
@@ -83,7 +69,6 @@
                 </sanitizer>
             </param>
             <param name="countsFiles_A" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich" />
-            <param name="alignmentFiles_A" type="data" format="tabular" multiple="true" label="Number of aligned reads file(s)" help="files of total aligned reads generated by repenrich"/>
             <param name="factorLevel_B" type="text" value="FactorLevel2" label="Specify a factor level, typical values could be 'wildtype' or 'control'"
                    help="Only letters, numbers and underscores will be retained in this field">
                 <sanitizer>
@@ -91,7 +76,6 @@
                 </sanitizer>
             </param>
             <param name="countsFiles_B" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich tool" />
-            <param name="alignmentFiles_B" type="data" format="tabular" multiple="true" label="Number of aligned reads file(s)" help="files of total aligned reads generated by repenrich"/>
             <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
             label="Output normalized counts table" />
     </inputs>
@@ -111,10 +95,8 @@
             <param name="factorName" value="Genotype"/>
             <param name="factorLevel_A" value="Mutant"/>
             <param name="countsFiles_A" value="355_fraction_counts.tab,356_fraction_counts.tab"/>
-            <param name="alignmentFiles_A" value="aligned_355.tab,aligned_356.tab"/>
             <param name="factorLevel_B" value="Wildtype"/>
             <param name="countsFiles_B" value="353_fraction_counts.tab,354_fraction_counts.tab"/>
-            <param name="alignmentFiles_B" value="aligned_353.tab,aligned_354.tab"/>
             <param name="normCounts" value="True"/>
             <output name="counts_out" file="Normalized_counts_file.tab"/>
             <output name="plots" file="edgeR_plots.pdf"/>
@@ -164,11 +146,10 @@
 Gypsy6_I-int    LTR        Gypsy      7489
 ============== ========== ========== ==========
 
-Count tables must be
-generated for each sample individually. Here, edgeR_ is handling a single factor
-(genotype, age, treatment, etc) that effect your experiment. This factor has two
-levels/states (for instance, "wild-type" and "mutant".
-You need to select appropriate count table from your history for each factor level.
+Count tables must be generated for each sample individually. Here, edgeR_ is handling a
+single factor (genotype, age, treatment, etc) that effect your experiment. This factor has
+two levels/states (for instance, "wild-type" and "mutant". You need to select appropriate
+count table from your history for each factor level.
 
 The following table gives some examples of factors and their levels:
 
@@ -189,14 +170,10 @@
 in above table, edgeR computes fold changes of 'Treated' samples against 'Untreated',
 i.e. the values correspond to up or down regulations of genes in Treated samples.
 
-*Number of aligned reads*:
-
-A file containing the number of reads aligned to transposons by repenrich must me provided
-to edger-repenrich. This file is a single-column tabular file containing a single value.
-
 **Output**
 
-edgeR_ generates a tabular file containing the different columns and results visualized in a PDF:
+edgeR_ generates a tabular file containing the different columns and results visualized in
+a PDF:
 
 ====== =============================================================================
 Column Description
@@ -212,7 +189,8 @@
 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
 ]]>
 
-**Note**: This edgeR_ wrapper was adapted from code available at https://github.com/nskvir/RepEnrich
+**Note**: This edgeR_ wrapper was adapted from code available at
+https://github.com/nskvir/RepEnrich
 
     </help>
     <citations>
b
diff -r 89e05f831259 -r 530626b0757c macros.xml
--- a/macros.xml Mon Mar 18 09:39:44 2024 +0000
+++ b/macros.xml Tue Apr 02 21:16:37 2024 +0000
b
@@ -1,18 +1,24 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.83</token>
-    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@TOOL_VERSION@">2.31.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">23.0</token>
 
-    <xml name="requirements">
+    <xml name="repenrich_requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">bedtools</requirement>
+            <requirement type="package" version="1.3.1">bowtie</requirement>
+            <requirement type="package" version="1.19.2">samtools</requirement>
+            <requirement type="package" version="1.83">biopython</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="edgeR_requirements">
         <requirements>
             <requirement type="package" version="4.0.2">bioconductor-edger</requirement>
             <requirement type="package" version="3.58.1">bioconductor-limma</requirement>
             <requirement type="package" version="1.20.4">r-getopt</requirement>
             <requirement type="package" version="0.2.21">r-rjson</requirement>
-            <requirement type="package" version="1.0.0">bowtie</requirement>
-            <requirement type="package" version="1.19.2">samtools</requirement>
-            <requirement type="package" version="2.23.0">bedtools</requirement>
-            <requirement type="package" version="@TOOL_VERSION@">biopython</requirement>
         </requirements>
     </xml>
+
 </macros>
b
diff -r 89e05f831259 -r 530626b0757c repenrich.xml
--- a/repenrich.xml Mon Mar 18 09:39:44 2024 +0000
+++ b/repenrich.xml Tue Apr 02 21:16:37 2024 +0000
[
b'@@ -3,84 +3,59 @@\n     <macros>\n         <import>macros.xml</import>\n     </macros>\n-    <expand macro="requirements"/>\n+    <expand macro="repenrich_requirements"/>\n     <stdio>\n         <exit_code range="1:" level="fatal" description="Tool exception" />\n     </stdio>\n     <command detect_errors="exit_code"><![CDATA[\n         #import re\n-        #set input_base = \'Sample\'\n-        #set baseReference = \'Genome\'\n-\n         ## uncompress fastq.gz or fastqsanger.gz if needed\n         #if $seq_method.seq_method_list == "single-read":\n             #if $seq_method.input_fastq.is_of_type("fastq.gz", "fastqsanger.gz"):\n-                gunzip < \'$seq_method.input_fastq\' > \'${input_base}.fastq\' &&\n+                gunzip < \'$seq_method.input_fastq\' > \'input.fastq\' &&\n             #else:\n-                ln -f -s \'$seq_method.input_fastq\' \'${input_base}.fastq\' &&\n+                ln -f -s \'$seq_method.input_fastq\' \'input.fastq\' &&\n             #end if\n         #elif $seq_method.seq_method_list == \'paired_collection\':\n             #if $seq_method.input_fastq.forward.is_of_type("fastq.gz", "fastqsanger.gz"):\n-                gunzip < \'$seq_method.input_fastq.forward\' > \'${input_base}.fastq\' &&\n-                gunzip < \'$seq_method.input_fastq.reverse\' > \'${input_base}_2.fastq\' &&\n+                gunzip < \'$seq_method.input_fastq.forward\' > \'input.fastq\' &&\n+                gunzip < \'$seq_method.input_fastq.reverse\' > \'input_2.fastq\' &&\n             #else:\n-                ln -f -s \'$seq_method.input_fastq.forward\' \'${input_base}.fastq\' &&\n-                ln -f -s \'$seq_method.input_fastq.reverse\' \'${input_base}_2.fastq\' &&\n+                ln -f -s \'$seq_method.input_fastq.forward\' \'input.fastq\' &&\n+                ln -f -s \'$seq_method.input_fastq.reverse\' \'input_2.fastq\' &&\n             #end if\n         #else:\n             #if $seq_method.input2_fastq.is_of_type("fastq.gz", "fastqsanger.gz"):\n-                gunzip < \'$seq_method.input_fastq\' > \'${input_base}.fastq\' &&\n-                gunzip < \'$seq_method.input2_fastq\' > \'${input_base}_2.fastq\' &&\n+                gunzip < \'$seq_method.input_fastq\' > \'input.fastq\' &&\n+                gunzip < \'$seq_method.input2_fastq\' > \'input_2.fastq\' &&\n             #else:\n-                ln -f -s \'$seq_method.input_fastq\' \'${input_base}.fastq\' &&\n-                ln -f -s \'$seq_method.input2_fastq\' \'${input_base}_2.fastq\' &&\n+                ln -f -s \'$seq_method.input_fastq\' \'input.fastq\' &&\n+                ln -f -s \'$seq_method.input2_fastq\' \'input_2.fastq\' &&\n             #end if\n         #end if\n-        ln -f -s \'$genome\' \'${baseReference}.fa\' &&\n-        bowtie-build \'$genome\' ${baseReference} &&\n+        ln -f -s \'$genome\' \'genome.fa\' &&\n+        bowtie-build \'$genome\' genome &&\n         python $__tool_directory__/RepEnrich_setup.py\n-            --annotation_file $repeatmasker\n-            --genomefasta ${baseReference}.fa\n-            --setup_folder setup_folder_${baseReference} &&\n-        #if $seq_method.seq_method_list == "single-read":\n-            bowtie $baseReference -p \\${GALAXY_SLOTS:-4} -t -m 1 -S --max ${input_base}_multimap.fastq ${input_base}.fastq ${input_base}_unique.sam 2>bowtie_alignments.txt &&\n-            TOTAL=\\$(grep \'reads processed:\' bowtie_alignments.txt | cut -d \' \' -f 4) &&\n-            NONALIGNED=\\$(grep \'reads that failed to align:\' bowtie_alignments.txt | cut -d \' \' -f 7) &&\n-            echo -e "# Total reads aligned to repeated sequences\\n" > bowtie_aligned.numb &&\n-            echo \\$((\\$TOTAL-\\$NONALIGNED)) >> bowtie_aligned.numb &&\n-        #else:\n-            bowtie $baseReference -p \\${GALAXY_SLOTS:-4} -t -m 1 -S --max ${input_base}_multimap.fastq -1 ${input_base}.fastq -2 ${input_base}_2.fastq ${input_base}_unique.sam 2>bowtie_alignments.txt &&\n-            TOTAL=\\$(grep \'reads processed:\' bowtie_alignments.txt | cut -d \' \' -f 4) &&\n-            NONALIGNED=\\$(grep \'reads that failed to align:\' bowtie_alignments.txt | cut -d \' \' -f 7) &&\n-       '..b'overlap to RepeatMasker annotated genomic instances of each repetitive element subfamily. Reads mapping to multiple locations are separately mapped to repetitive element assemblies \xe2\x80\x93 referred to as repetitive element psuedogenomes \xe2\x80\x93 built from RepeatMasker annotated genomic instances of repetitive element subfamilies. RepEnrich then return tables of counts merged from both strategies, that can be further processed in statistical analysis for differential expression. For detailed information see the `original publication`_.\n+Reads are mapped to the genome using the Bowtie1 aligner. Reads mapping uniquely to the\n+genome are assigned to subfamilies of repetitive elements based on their degree of overlap\n+to RepeatMasker annotated genomic instances of each repetitive element subfamily.\n+\n+Reads mapping to multiple locations are separately mapped to repetitive element assemblies\n+\xe2\x80\x93 referred to as repetitive element psuedogenomes \xe2\x80\x93 built from RepeatMasker annotated\n+genomic instances of repetitive element subfamilies.\n+\n+RepEnrich then return tables of counts merged from both strategies, that can be further\n+processed in statistical analysis for differential expression. For detailed information\n+see the `original publication`_.\n \n .. _original publication: https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-15-583\n \n@@ -171,7 +132,8 @@\n \n *Sequencing dataset*: Single-reads or Paired-end sequencing datasets in fastq format.\n \n-*RepeatMasker description file*: a txt repeatmasker file which can be downloaded from http://www.repeatmasker.org/genomicDatasets/RMGenomicDatasets.html\n+*RepeatMasker description file*: a txt repeatmasker file which can be downloaded from\n+https://www.repeatmasker.org/genomicDatasets/RMGenomicDatasets.html\n \n This file looks like:\n \n@@ -221,27 +183,29 @@\n  \n **Outputs**\n \n-(1) Fraction counts, (2) Family fraction counts and (3) Class fraction counts are returned in tabular format,\n-for further statistical tests differential expression analysis or graphics.\n-\n-The "aligned_reads.tab" output file contains a single value corresponding to the number of reads that were aligned to\n-transposons. This value is used in downstream analysis by the edger-repenrich tool.\n+(1) Fraction counts, (2) Family fraction counts and (3) Class fraction counts are returned\n+in tabular format for further statistical tests, differential expression analysis or graphics.\n \n **RepEnrich**\n \n-This Galaxy tool is a wrapper of the RepEnrich tool by steven_criscione@brown.edu et al. whose code and manual are available in `GitHub`_.\n+.. class:: warningmark\n \n-.. _GitHub: https://github.com/nskvir/RepEnrich\n+Earlier versions of the RepEnrich.py and RepEnrich_setpup.py scripts of this galaxy wrapper\n+were directly derived from the `nskvir/RepEnrich GitHub repository`_ which is not maintained\n+anymore.\n \n-Python scripts RepEnrich.py and RepEnrich_setup.py have been adapted to python 3. Note that sorting of Fraction counts, Family fraction counts and Class fraction counts is  different with this Galaxy wrapper or with RepEnrich as found in the `RepEnrich code repository`_. However, this different sorting does not affect subsequent statistical analyses\n+Starting from 2024, python codes were extensively rewritten for clarity, maintenance and\n+optimization and we now refer exclusively to our `GitHub repository`_ for code review.\n \n-.. _RepEnrich code repository: https://github.com/nskvir/RepEnrich\n+.. _nskvir/RepEnrich GitHub repository: https://github.com/nskvir/RepEnrich\n+.. _GitHub repository: https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich\n \n **Execution time**\n \n .. class:: warningmark\n \n-This tool includes steps to index the reference genome, index repeat sequences and align reads to these indexes. Therefore the run time may be **long to very long**. \n+This tool includes time-consuming steps to index the reference genome, index repeat\n+sequences and to align reads to these indexes.\n \n .. class:: infomark\n \n'
b
diff -r 89e05f831259 -r 530626b0757c test-data/Normalized_counts_file.tab
--- a/test-data/Normalized_counts_file.tab Mon Mar 18 09:39:44 2024 +0000
+++ b/test-data/Normalized_counts_file.tab Tue Apr 02 21:16:37 2024 +0000
b
b'@@ -1,211 +1,211 @@\n Tag\tMutant_1\tMutant_2\tWildtype_1\tWildtype_2\n-5S_DM\t247.942840217287\t356.138990129468\t271.810660587451\t503.611727197063\n-ACCORD_I-int\t160.687012069425\t174.461719477315\t117.166444321613\t104.919109832721\n-ACCORD_LTR\t30.9390054922851\t29.5494000493336\t12.6933900385148\t10.6615261723782\n-ACCORD2_I-int\t89.5272994371693\t103.079302497675\t126.245445332212\t119.118324235025\n-ACCORD2_LTR\t8.34178249348953\t6.52835582485276\t10.1116835900033\t9.30460465953003\n-ALA_DM\t0.352469682823501\t0\t0.215142204042624\t0.290768895610313\n-ARS406_DM\t29.5682900590826\t19.7568663120544\t24.2250121751994\t34.8438059906359\n-Baggins1\t215.398139503251\t157.367735146451\t220.994071992583\t188.660551768492\n-BARI_DM\t43.7454039682056\t44.8394965864887\t42.5981564004395\t43.7122573067505\n-BARI1\t14.490420293855\t10.7374273435078\t16.9962341193673\t15.6045973977535\n-BATUMI_I-int\t184.968256886155\t158.226729333931\t275.166878970516\t232.082040179632\n-BATUMI_LTR\t13.628827735842\t14.5170017684226\t23.8377562079227\t19.5299774884927\n-BEL_I-int\t1148.97283940843\t1304.98396962057\t1022.22666828812\t1059.36800967358\n-BEL_LTR\t36.3043773308206\t51.9691483425779\t37.6498857074591\t41.5799520722748\n-Bica_I-int\t61.1339083208317\t53.0858407863027\t79.6886723773878\t67.3614608163893\n-Bica_LTR\t4.81708566525452\t5.7552610561202\t6.02398171319346\t6.05768532521486\n-BLASTOPIA_I-int\t539.670247700872\t1034.65849882041\t443.580196295081\t462.322544020398\n-BLASTOPIA_LTR\t40.6515034189771\t78.9415658294696\t23.6656424446886\t23.1645886836216\n-BLOOD_I-int\t1071.58616237963\t1140.74428097427\t960.825083254357\t834.603653366803\n-BLOOD_LTR\t49.6982252781137\t46.55748496145\t32.1852737247765\t36.1522660208823\n-BS\t203.649150075801\t355.194096523239\t313.935504138996\t242.355874491196\n-BS2\t204.197436249082\t179.186187508459\t240.744126323696\t241.871259665179\n-BS3_DM\t33.3279666758666\t38.9124366928724\t53.871607892273\t42.5007202417075\n-BS4_DM\t0.313306384732001\t1.97568663120544\t1.46296698748984\t1.0176911346361\n-BURDOCK_I-int\t203.257517094886\t191.727502645676\t134.205706881789\t161.473660028927\n-BURDOCK_LTR\t21.2656708636846\t16.5785878183761\t12.3491625120466\t17.930748562636\n-Chimpo_I-int\t53.4970651929892\t31.7827849367832\t55.4206317613799\t51.369171557822\n-Chimpo_LTR\t4.19047289579051\t2.83468081868607\t4.04467343600132\t5.81537791220627\n-Chouto_I-int\t48.2100199506366\t44.4958989114965\t72.7180649664068\t63.1937733126415\n-Chouto_LTR\t6.34445429082302\t13.658007580942\t12.8655038017489\t13.617676611083\n-CIRCE\t143.964283784354\t134.862087434458\t162.088136525713\t161.037506685512\n-Copia_I-int\t1992.94191328026\t7492.14730320603\t1188.10130760498\t1295.52081439175\n-Copia_LTR\t102.294534614998\t264.140712650293\t60.5840446584028\t54.7130138573406\n-Copia1-I_DM\t160.56952217515\t167.16026888373\t154.386045620987\t170.487495792847\n-Copia1-LTR_DM\t23.4979788549001\t33.4148738929964\t33.1749278633726\t27.4291991525729\n-Copia2_I-int\t141.536159302681\t162.521700271335\t181.493963330357\t167.434422388939\n-Copia2_LTR_DM\t29.2549836743506\t39.5137326241088\t38.3383407603955\t23.5522805444354\n-DIVER_I-int\t968.978321379896\t1009.40406970848\t567.028792974739\t757.307588617061\n-DIVER_LTR\t21.4223240560506\t28.1750093493646\t20.1803387391981\t20.7899760361374\n-DIVER2_I-int\t252.44661949781\t229.179649219831\t287.386956160137\t272.692762599872\n-DIVER2_LTR\t25.3778171632921\t20.615860499535\t34.1215535611601\t25.9268931919196\n-DM1731_I-int\t737.405739764856\t590.386705055435\t222.887323388158\t236.54049657899\n-DM1731_LTR\t49.8940417685711\t27.5737134181281\t24.8274103465188\t21.3230523447563\n-DM176_I-int\t185.08574678043\t197.396864283048\t156.064154812519\t136.322150558635\n-DM176_LTR\t21.9314469312401\t28.3468081868607\t38.6825682868637\t42.2584128286989\n-DM297_I-int\t3226.54663986442\t3668.33467763602\t509.327653850507\t558.760894397819\n-DM297_LTR\t239.992690704713\t216.122937570126\t64.0693483638933\t71.6260712853405\n-DM412\t1711.00533031955\t2019.32353592946\t1036.38302531413\t1142.04329899211\n-DM412B_LTR\t219.588612399041\t257.784155662936\t132.441540808639\t141.022914371002\n-DMCR1A\t684.926920322246\t693.380108134362\t876.919623677734\t870.2'..b'789344043\t303.554556241254\t361.924932338747\t415.117944475564\n+POGON1\t85.9627029802447\t75.30035503659\t49.3601782464464\t67.1318345199247\n+PROTOP\t640.361788971401\t605.638402423199\t736.972812368393\t725.123760955911\n+PROTOP_A\t598.788562881111\t584.75431958102\t526.856524908444\t563.707513685918\n+PROTOP_B\t1256.71839255518\t1264.81065100522\t1295.89108447619\t1380.11724317016\n+QUASIMODO_I-int\t2401.85961057127\t2353.13609489344\t2222.84838955145\t2168.54149325156\n+QUASIMODO_LTR\t217.924169021681\t155.306982262967\t198.633708230413\t195.565195350847\n+QUASIMODO2-I_DM\t533.344258584139\t532.691183481502\t405.17100995648\t370.641021853182\n+QUASIMODO2-LTR_DM\t39.9639399190529\t30.8849112454764\t46.6759389460354\t33.3160469081512\n+R1_DM\t2068.46915876333\t1770.44076939545\t1095.0205101621\t900.699328161867\n+R1-2_DM\t24.6757212923011\t20.0016568065942\t39.8162162894296\t29.4847015137138\n+R2_DM\t7247.42027216596\t4449.78035544349\t2285.62976429995\t1400.44003178414\n+ROO_I-int\t4562.72861699979\t6747.9118941188\t4741.11222655369\t4669.91029511555\n+ROO_LTR\t297.583834497914\t427.388343235021\t244.414900742978\t308.340014134939\n+ROOA_I-int\t550.241763382128\t758.592248591272\t743.385161808263\t764.436696307529\n+ROOA_LTR\t58.3366237073423\t81.1831952738236\t59.6497622313551\t80.4582532831851\n+ROVER-I_DM\t931.642585965304\t902.721834403496\t971.24725353204\t931.016930848285\n+ROVER-LTR_DM\t55.6544800886139\t49.4158579927622\t84.1061647462108\t69.1307973344137\n+S_DM\t667.049117977749\t600.343846209689\t538.637352949137\t579.032895263668\n+S2_DM\t96.0207415504761\t60.2991124316444\t114.080170267467\t122.103311918374\n+SSU-rRNA_Cel\t516.983182509896\t534.456035552672\t516.417816517957\t363.64465200247\n+SSU-rRNA_Dme\t1335.43930776486\t1124.21076933534\t1060.72189687907\t691.80771404776\n+SSU-rRNA_Hsa\t75592.5969999742\t88775.8830260092\t47428.2715721784\t28668.4583644641\n+Stalker2_I-int\t1470.61934614877\t1415.99964510213\t1827.07221714641\t2077.92184566139\n+Stalker2_LTR\t105.944672939771\t104.714556222758\t108.26431844991\t111.941917611388\n+Stalker3_LTR\t72.8201992484756\t69.1233727874948\t88.430772507984\t73.1287229633919\n+STALKER4_I-int\t977.239027483686\t921.252781150781\t925.615185425053\t956.33712649848\n+STALKER4_LTR\t273.578649110295\t236.196035524929\t192.668732007277\t201.562083794315\n+TABOR_I-int\t738.125923874051\t1085.67816578146\t588.594028817897\t513.233702620069\n+TABOR_LTR\t59.4094811548337\t25.8844970438278\t57.2637717421009\t45.4764040296264\n+TAHRE\t510.411930644011\t416.799230808\t680.007289437449\t651.995037992519\n+TART_B1\t1794.62229529116\t1496.88869836409\t3765.09299204314\t3583.64058567528\n+TART-A\t682.873765328246\t536.809171647566\t551.3129274233\t567.705439314896\n+TC1_DM\t351.092599691545\t436.800887614595\t274.388906264234\t326.497259699882\n+TC1-2_DM\t595.167668995828\t672.702781127662\t567.716612036923\t657.658765966905\n+TIRANT_I-int\t3106.59284639215\t3743.83952697546\t1627.99113569926\t1425.76022743433\n+TIRANT_LTR\t432.495658519952\t469.450650931241\t204.44956004797\t213.056119977627\n+TLD2\t4.42553697090183\t0.29414201186168\t9.69308636259521\t2.83186398719285\n+TOM_I-int\t929.228656708448\t997.141420211095\t382.205851497408\t384.966922023687\n+Transib-N1_DM\t49.0832282227294\t38.2384615420184\t45.9303169181435\t61.8012670146205\n+TRANSIB1\t164.817725370859\t85.0070414280255\t310.477012414204\t352.48377628824\n+TRANSIB2\t450.197806403559\t436.506745602733\t476.750724634106\t512.400801447365\n+TRANSIB3\t241.661140047427\t217.959230789505\t176.563296204811\t188.568825500136\n+TRANSIB4\t48.8150138608566\t45.003727814837\t94.6939975422763\t95.1173139227717\n+Transib5\t130.486287051136\t96.478579890631\t167.46670746453\t160.749926331829\n+TRANSPAC_I-int\t3818.5678699836\t4929.82011880175\t1442.62949956532\t2495.70507388961\n+TRANSPAC_LTR\t195.394162624363\t287.965029612585\t80.2289302011727\t92.4520301701196\n+XDMR\t237.503817438398\t265.610236711097\t186.256382567406\t239.875537738689\n+XDMR_DM\t542.999975611561\t466.803372824486\t494.645653303513\t720.959255092392\n+ZAM_I-int\t924.803119737546\t850.658698303978\t764.859076211551\t737.284118077386\n+ZAM_LTR\t50.55840721303\t34.7087573996782\t38.6232210448025\t37.4805527716701\n'
b
diff -r 89e05f831259 -r 530626b0757c test-data/Samp-paired_class_fraction_counts.tab
--- a/test-data/Samp-paired_class_fraction_counts.tab Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-Low_complexity 2.0
-Simple_repeat 121.0
-rRNA 111.0
b
diff -r 89e05f831259 -r 530626b0757c test-data/Samp-paired_family_fraction_counts.tab
--- a/test-data/Samp-paired_family_fraction_counts.tab Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-Low_complexity 2.0
-Simple_repeat 121.0
-rRNA 111.0
b
diff -r 89e05f831259 -r 530626b0757c test-data/Samp-paired_fraction_counts.tab
--- a/test-data/Samp-paired_fraction_counts.tab Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,11 +0,0 @@
-A-rich Low_complexity Low_complexity 2
-LSU-rRNA_Cel rRNA rRNA 111
-_ATA_n Simple_repeat Simple_repeat 0
-_ATTTAT_n Simple_repeat Simple_repeat 1
-_ATT_n Simple_repeat Simple_repeat 11
-_AT_n Simple_repeat Simple_repeat 0
-_CTAATT_n Simple_repeat Simple_repeat 29
-_TAT_n Simple_repeat Simple_repeat 0
-_TA_n Simple_repeat Simple_repeat 0
-_TTA_n Simple_repeat Simple_repeat 80
-_T_n Simple_repeat Simple_repeat 0
b
diff -r 89e05f831259 -r 530626b0757c test-data/Samp.fastq
--- a/test-data/Samp.fastq Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,10040 +0,0 @@\n-@HISEQ:262:CA81LANXX:2:1101:1338:1996\n-GCGGGTGATAAACTTCTGTGAAAAAAAGCTCAAAAAAATCTCACAAAAAATAAAACTTCTGATAAAATAAATAAAATTATTCCTCATCGTAAACCAATAGTTACTGCATAAGTATGTAATCCTTG\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFF\n-@HISEQ:262:CA81LANXX:2:1101:7367:1984\n-GGGTCTTCTCGTCTTTTAAATAAATTTTAGCTTTTTGACTAAAAAATAAAATTCTATAAAAATTTTAAATGAAACAGTTAATATTTCGTCCAACCATTCATTCCAGCCTTCAATTAAAAGACTAA\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFF<FBF/FFFFFFFFFFFFFFB/FFFFBFFFFFFFFFFB<FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF<FFFFFFFFFFFFF<FFFFFBF\n-@HISEQ:262:CA81LANXX:2:1101:19502:1983\n-GTCCAACCATTCATTCCAGCCTTCAATTAAAAGACTAATGATTATGCTACCTTTGCACAGTCAAAATACTGCGGCCATTTAAAATTTTCAGTGGGCGGGAGATCGGAAGAGCACACGTCTGACCT\n-+\n-B<BBBFFFFFFF///FBF///</<<<B/BBF<BFFFFB<//<FBFFFFFBFFF//</</<<B/FB</<B///<<BF//</<FFBF</BF//<FB###############################\n-@HISEQ:262:CA81LANXX:2:1101:1225:2201\n-TCGTCTTTTAAATAAATTTTAGCTTTTTGACTAAAAAATAAAATTCTATAAAAATTTTAAATGAAACAGTTAATATTTCGTCCAACCATTCATTCCAGCCTTCAATTAAAAGACTAATGATTATG\n-+\n-<<BBBBFFFFFFFB<BFFFFFFFFFFFFF/B/FFFFFF/B<BFBFFFFFFFFFFFFFFFB/</FFB/FFFFFFFFFFFFBFFFBFFFF/BFFFBF//B<FFFBBFFF<<BFFFBFFFF/BBFFFB\n-@HISEQ:262:CA81LANXX:2:1101:3596:2043\n-AGGGTCTTCTCGTCTTTTAAATAAATTTTAGCTTTTTGACTAAAAAATAAAATTCTATAAAAATTTTAAATGAAACAGTTAATATTTCGTCCAACCATTCATTCCAGCCTTCAATTAAAAGACTA\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBBFFFFFFFFFFFFFFBFFF/F/FFFBFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFF<FFFFFFFFFB<FFFFFFFFFBFB/FFFF\n-@HISEQ:262:CA81LANXX:2:1101:4635:2158\n-TCGTCTTTTAAATAAATTTTAGCTTTTTGACTAAAAAATAAAATTCTATAAAAATTTTAAATGAAACAGTTAATATTTCGTCCAACCATTCATTCCAGCCTTCAATTAAAAGACTAATGATTATG\n-+\n-BBBBBFFFFFFFFF<FFFFFFFFFFFFFFBFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB/BFFFFFFFFFFFFF<FF/BFF\n-@HISEQ:262:CA81LANXX:2:1101:4872:2211\n-GCCGATTCTTTTTAAAGTTACAGATGGTTCATTAATTTCATCTAATAAATAAAGTAAACGTAAAGAAGGAAGAGCAATAAATAGTAAAATAATTGCTGGTAAAATAGTTCAAATTATTTCAATAA\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFF\n-@HISEQ:262:CA81LANXX:2:1101:5610:2019\n-GTCCTTTCGTACTAAAATATCATAATTTTTTAAAGATAGAAACCAACCTGGCTTACACCGGTTTGAACTCAGATCATGTAAGAATTTAAAAGTCGAACAGACTTAAAATTTGAACGGCTACACCC\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n-@HISEQ:262:CA81LANXX:2:1101:6566:2127\n-CTAACCAAGAAGTTCTTAAGATAATATTAAATCGAATTAATAAATATACACCAGCTGTAACTAATGTAGAAGAATGAACTAAAGCAGAAACAGGTGTAGGAGCAGCTATAGCTGCAGGTAACCAA\n-+\n-BBBBBFFFFF<FFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFBFFFFFFFFFFFFFFFBFFFFFFFFF\n-@HISEQ:262:CA81LANXX:2:1101:6892:2032\n-ACCGTCAACTTTTACTCCTAAAGCAGGTACTGTTCAAGAATGAATAACATCAGCAGCTGTTACTAAAATTCGAATTTGTGAGTTTATGGGTAAAACTACTCGGTTATCAACATCTAATAATCGAA\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFF\n-@HISEQ:262:CA81LANXX:2:1101:7493:2161\n-GTCCAACCATTCATTCCAGCCTTCAATTAAAAGACTAATGATTATGCTACCTTTGCACAGTCAAAATACTGCGGCCATTTAAAATTTTCAGTGGGCAGGTTAGACTTTATATATAATTCAAAAAG\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFF\n-@HISEQ:262:CA81LANXX:2:1101:7572:2014\n-CTAAAATTGAAGAAATTCCTGCTAAATGTAGAGAAAAAATAGCTAAATCAACTGAAGCTCCACCATGAGCAATTCCAGCGGATAGAGGTGGATAAACAGTTCATCCTGTCCCAGCTCCATTTTCA\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFBBBF<F/<BFFFFFFFFFFFB\n-@HISEQ:262:CA81LANXX:2:1101:9943:2018\n-GCTGCTTCAAAACCAAAATGATGATTTTTTGAGAAGTGATTATTTAAATGTCGTAGTAAACATACTAATAAAAAAGTTGTTCCGATTAATACATGAATTCCGTGAAATCCTGTTGCTATAAAAAA\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFF<FFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFF\n-@HISEQ:262:CA81LANXX:2:1101:9821:2111\n-GTCCTTTCGTACTAAAATATCATAATTTTTTAAAGATAGAAACCAACCTGGCTTACACCGGTTTGAACTCAGATCATGTAAGAATTTAAA'..b'\n-@HISEQ:262:CA81LANXX:2:1101:13847:33438\n-GTCCAACCATTCATTCCAGCCTTCAATTAAAAGACTAATGATTATGCTACCTTTGCACAGTCAAAATACTGCGGCCATTTAAAATTTTCAGTGGGCAGGTTAGACTTTATATATAATTCAAAAAG\n-+\n-BBBBBBFB<FFFFBFFFBFBFFFFF<BFFFFBFFF/<F<F<FFF/FFF<FBFFFBFFF<FFFFF/</<FB/<B/<///<F/BFFFFFFFFFFF<B</BFFF/7BF<FFFFFF<BFFFBF/7/7B7\n-@HISEQ:262:CA81LANXX:2:1101:15264:33251\n-TAAGAATTTAAAAGTCGAACAGACTTAAAATTTGAACGGCTACACCCAAAATTATATCTTAATCCAACATCGAGGTCGCAATCTTTTTTATCGATATGAACTCTCCAAAAAAATTACGCTGTTAT\n-+\n-BBBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n-@HISEQ:262:CA81LANXX:2:1101:16362:33356\n-GTCTTTTAAATAAATTTTAGCTTTTTGACTAAAAAATAAAATTCTATAAAAATTTTAAATGAAACAGTTAATATTTCGTCCAACCATTCATTCCAGCCTTCAATTAAAAGACTAATGATTATGCT\n-+\n-BBBBBFFFFFFFFFBFFFFFFFFFFFBFFBF/FBFFFBFFF/FFFFFFFFFFFFFFFFFBBFBFFFFFFFFFFFFFFB<<FFFFFFFFFBFF/BFFBBFFFFFFFF//F/FBFB/F7F<FFF/BF\n-@HISEQ:262:CA81LANXX:2:1115:5809:65592 1:N:0:CGTACG\n-CCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATT\n-+\n-BBBBBFFFFFFFFFFFFFFBFFFF/F<FFFFFFFFFB/FFFFFFFFFFFFFFFFFFFFFF<B<<BFFFFFFFFFF<FFFFFFF/<F<F///BFFF//FBFFF7BF<FFFF//<</<FFFB77/BF\n-@HISEQ:262:CA81LANXX:2:1215:17203:64615 1:N:0:CGTACG\n-CCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATT\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBF/FFBFFFFFFFBF\n-@HISEQ:262:CA81LANXX:2:1215:14166:68404 1:N:0:CGTACG\n-CCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTAAAAAAAAATTATTTATTTAATTTTTATTAATAAAATATTTTTTTAATTAATTATTTTATGAATAAAATTTT\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFBBBFFFBFFFFBBFFFFFFFFFFBBF/BFFFFFFF/B#############################################################\n-@HISEQ:262:CA81LANXX:2:1301:20643:41959 1:N:0:CGTACG\n-CATATATATAGATATATATATATATATATCAGGGATCGTGGCGGAGCGAGGTGGAAGGGGTGGGGGGGGAGAGGATAACCAGGGGGCGGGGGGGGAGGTTTGGGAGTCATAGAAAATTTGGGATA\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFF##################################################################################################\n-@HISEQ:262:CA81LANXX:2:1310:4460:5881 1:N:0:CGTACG\n-CCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATT\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFBF7<77<FFFFFFFBFFFFFFFFBBFBFFFFFFFFFFFBF<F<FFFFFFB7BF\n-@HISEQ:262:CA81LANXX:2:1314:2827:42596 1:N:0:CGTACG\n-CCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTA\n-+\n-BBBBBFFFFFBFFFFFFFFF<FFFFBFFFFFBBFF/BB/FFFF/FFFBFFBBFFFFFFFFF<</<BB//</B7FF/<//<<<BBBFFBFF/BBFFFBFFFFFB/BFBFFB<F/7BB/FF/7FFFB\n-@HISEQ:262:CA81LANXX:2:2103:4469:38754 1:N:0:CGTACG\n-ATTTTTTTTTTTTTTTTTTTTTTTTATTAAAATAATTTATTGTTTTGTGTTTTTTTTTTTTTTTTTTATTTTTAAATTTATATTAATTATTAATTTATATAAAATATAATATAAAAAAATTATAA\n-+\n-BBBBBFFFBFFFFFFFFBF<BB#######################################################################################################\n-@HISEQ:262:CA81LANXX:2:2108:19262:60414 1:N:0:CGTACG\n-CCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTTAAAAAAAAATTATTTATTAAGTTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATT\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF<<<FFFFBFFFFFFFFFFFFFFFFFBFFFFBFFFFFFFFF<B<BBFFFBFFFF\n-@HISEQ:262:CA81LANXX:2:2110:11698:64422 1:N:0:CGTACG\n-ATTTTTTTTTTTTTTTTTTTTTTTAATTTTAAAAAATTTTTTTTGTTTTTTTTTTTTTTTTTTTTAATTTTAAATTTATATTTAAATTTTTTTTTTATAAAAATTAAAAAAAAAAAAATAAAAAG\n-+\n-BBBBBFFFFFFFFFFFFFFFFF#######################################################################################################\n-@HISEQ:262:CA81LANXX:2:2110:8000:79787 1:N:0:CGTACG\n-CCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTAGTTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATTT\n-+\n-BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/FFFFF/<FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFBFFFFFFFFFFFFFFFFFFF/FBFFFBFFFFFFF\n'
b
diff -r 89e05f831259 -r 530626b0757c test-data/Samp_L.fastq
--- a/test-data/Samp_L.fastq Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,10000 +0,0 @@\n-@ERR1044075.680 HWI-ST790:248:H0G15ADXX:1:1101:10892:2723/1\n-GCTCCTAAAATTGAAGAAATTCCTGCTAAATGTAGAGAAAAAATAGCTAAA\n-+\n-CCCFFFFFHHDHHJIGGIIIGIIJJJGIJIGIAEFFHJIIIIGEEIICFHI\n-@ERR1044075.759 HWI-ST790:248:H0G15ADXX:1:1101:15792:2653/1\n-GAAAATAATCATTCACAGACTACCCAAGGATTATTTTTTACAGTTTTACTA\n-+\n-CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJJJ\n-@ERR1044075.753 HWI-ST790:248:H0G15ADXX:1:1101:15644:2508/1\n-CTTATCTTACCTTAATAATAAAAGCGACGGGCGATGTGTACATATTTTAGA\n-+\n-CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJIJIJJFHHIIJIJJJJJIHH\n-@ERR1044075.814 HWI-ST790:248:H0G15ADXX:1:1101:19529:2745/1\n-TGAGTCTGCAATAGTAAATGGAGCTTCAATATATTCATAAGCTTGAAGAAT\n-+\n-?@?DDDFFHHGHHJHHIGJIEDBGGIHIJGIFCHEHI>GAEGIIGIEDHEH\n-@ERR1044075.946 HWI-ST790:248:H0G15ADXX:1:1101:8484:2908/1\n-CATACACAACATATATTTGCTCATTTAGTTCCTCAAGGAACACCCGCTATT\n-+\n-CCCFFFFFHHHHHJJJJJJJJJJJJJJIIJJJJJJJJIJJJIJJJGIIJJJ\n-@ERR1044075.818 HWI-ST790:248:H0G15ADXX:1:1101:19811:2591/1\n-ATTTTTTCCTCAACATTTTTTAGGATTGGCTGGAATACCTCGAGGTTTTTC\n-+\n-@@CFFDD?FHDDDC@FHEGGG9CGDEGGGFHEI)09*?B<*7?########\n-@ERR1044075.978 HWI-ST790:248:H0G15ADXX:1:1101:10873:2966/1\n-CTCATCAGTAAATTGTGATATATAAAAATAATCAAACTACATCGACAAAAT\n-+\n-CCCFFFFFHHHHHJJHIJJJJJJJJJJJJJJJJJJJJIJJJIJJJJJJJJJ\n-@ERR1044075.998 HWI-ST790:248:H0G15ADXX:1:1101:12703:2936/1\n-TTTTCATTTAGTGGATTATAGTCCATGACCATTAACAGGAGCTATCGGAGC\n-+\n-BCCFDFFFHHHDHIIIJJJIJGHJJJJFJIFIIJIJJJIGGHJIIGIIIJI\n-@ERR1044075.1137 HWI-ST790:248:H0G15ADXX:1:1101:1525:3138/1\n-TTGGAAGATTAGTAATATTAGCTGCTATAACTAAAAGAGCTCAGATTCCTT\n-+\n-BB@DDDEBFHHHDGIJJJJIJIJIJHHIJGGJJJJIGDFGGHEGCGHJGIJ\n-@ERR1044075.1446 HWI-ST790:248:H0G15ADXX:1:1101:1567:3357/1\n-TAATCCAGCTATAAATATTGTTAATCCAGATAATAATAATATTAATTGTCC\n-+\n-@@CFFDFFHHGHHIBFGHIFC>HHHIHCFGHC>FH@HG<DFHGHEGHC?CG\n-@ERR1044075.1652 HWI-ST790:248:H0G15ADXX:1:1101:16002:3269/1\n-CTTGAGCCCACCATAGACTTATAGAAAATAATCATTCACAGACTACTCAAG\n-+\n-BCCFFFFFHHHHHJIJIJJJJJJIJJJJGIJGIJHIIJIJHGGIIJEGIII\n-@ERR1044075.2019 HWI-ST790:248:H0G15ADXX:1:1101:20598:3595/1\n-CTAAAATTGAAGAAATTCCTGCTAAATGTAGAGAAAAAATAGCTAAATCAA\n-+\n-@@@DDDE>DHBHHJJJIIJJGIJGIGHGGHHGCHGGIIGFGHIEGHGHIEC\n-@ERR1044075.2025 HWI-ST790:248:H0G15ADXX:1:1101:1366:3788/1\n-CTACCTTTGCACAGTCAAAATACTGCGGCCATTTAAAATTTTCAGTGGGCA\n-+\n-@@CDDDFFHHBF<GG<FHGIJFIJJJIGJFIGHIJJJGIIIEDDBDHIIC?\n-@ERR1044075.2112 HWI-ST790:248:H0G15ADXX:1:1101:7863:3843/1\n-CCGTGGGAATGCTATATCAGGAGCACCTAATATTAAAGGCACTAATCAATT\n-+\n-CCCFFFFFHHHHHJJIJIIIHIIIIJIJJIJJJJJJJJJHIIIGHHIIHJJ\n-@ERR1044075.2117 HWI-ST790:248:H0G15ADXX:1:1101:8122:3811/1\n-TAAGATTTTGACTTCTACCTCCTGCTCTTTCTTTACTATTAGTAAGTAGAA\n-+\n-BBBFDFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJFIGIGIEFFGIBGEGG\n-@ERR1044075.2555 HWI-ST790:248:H0G15ADXX:1:1101:1876:4399/1\n-CTGTTACTAAAATTCGAATTTGTGAGTTTATGGGTAAAACTACTCGGTTAT\n-+\n-CCCFFFFFHHHHHJJJJJJJJJIJHIJIJHHJJICHIIIIHIGIGHGJIGI\n-@ERR1044075.3073 HWI-ST790:248:H0G15ADXX:1:1101:1378:4822/1\n-TTTATTTGTTTGATCAGTAGTTATTACTGCTTTATTATTATTATTATCACT\n-+\n-BB@ADDEFFHHHHIGIIJGIIIJJJIIIGGIJGHGIAHG@FI<EF?D?FGH\n-@ERR1044075.3103 HWI-ST790:248:H0G15ADXX:1:1101:3753:4970/1\n-TAGATACTCGAGCTTATTTTACCTCAGCTACTATAATTATTGCAGTTCCTA\n-+\n-@BCFFFFFHHFHHJJIJJJJJJIJIJJHIIIIGIJJJIGIIHIHGIJJDCA\n-@ERR1044075.3115 HWI-ST790:248:H0G15ADXX:1:1101:4946:4934/1\n-GCTGAGGTAAAATAAGCTCGAGTATCTACATCTATTCCAACGGTAAATATA\n-+\n-CCCFFFFDFHHHHJJJJIJJJJHFHIJJJJJIJIJJJIIIGHJFHIJIHGH\n-@ERR1044075.3167 HWI-ST790:248:H0G15ADXX:1:1101:8133:4808/1\n-CTGGGTCAAAAAATGATGTATTTAAATTTCGATCTGTTAATAATATAGTAA\n-+\n-CCCFFDFFHHHHHJJIJJIJJJJJJJJJJJJJJJJJJJJIJJJJIJIIFGI\n-@ERR1044075.3206 HWI-ST790:248:H0G15ADXX:1:1101:9789:4942/1\n-TCATCATATATTTACCGTTGGAATAGATGTAGATACTCGAGCTTATTTTAC\n-+\n-8?<:DDBD<D?A:A<A)CC7B9AFEFE<A2+22@9:CE7D?DDB*99?D##\n-@ERR1044075.3251 HWI-ST790:248:H0G15ADXX:1:1101:12710:4758/1\n-TTTAGCTAATTCATCAGTAGATATTATTTTACATGATACTTATTATGTAGT\n-+\n-BCCFFFFFHHHHHJJJJGHJIJJJJJJJJJIIGJJGIIIIJGHIIJJFHEH\n-@ERR1044075.3552 HWI-ST790:248:H0G15ADXX:1:1101:10536:5030/1\n-CTCTTTCTTTACTATTAGTAAGTAGAATAGTTGAAAATGGAGCTGGGACAG\n-+\n-CCCFFFFFHHHHHIJJJJHIJJIHJIJJJJJJJJJJJIIIIJIIJJIIJJJ\n-@ERR1044075.3658 HWI-ST790:248:H0G15ADXX:1:1101:18523:5017/1\n-TCT'..b'JIJGG\n-@ERR1044075.659985 HWI-ST790:248:H0G15ADXX:1:1106:11631:62123/1\n-AAAGAATCGGCCATCAATGATATTGAAGTTACGAATATTCAGATTTTAATA\n-+\n-<<@DDDDDBF1CDC:C?,C9AAC42?C@H@AGG1?DFGEFF@HGGGCEHE?\n-@ERR1044075.660170 HWI-ST790:248:H0G15ADXX:1:1106:7215:62363/1\n-TGGAAGTGATAATAATAATAATAAAGCAGTAATAACTACTGATCAAACAAA\n-+\n-BBCDDFDDHHHHHIJJJIIJJIIJJJIJJIJJJJJIIIJJIIJGIJJGJJJ\n-@ERR1044075.660255 HWI-ST790:248:H0G15ADXX:1:1106:12160:62309/1\n-CAGCTGTTACTAAAATTCGAATTTGTGAGTTTATGGGTAAAACTACTCGGT\n-+\n-CCCFFF=DFHFGHJIGHGIGEHIHFCEFGGHJHIJJJ:DGGIIGGEHGCHD\n-@ERR1044075.660247 HWI-ST790:248:H0G15ADXX:1:1106:11554:62387/1\n-CTTGAGCCCACCATAGACTTATAGAAAATAATCATTCACAGACTACTCAAG\n-+\n-CCCFFFFFHHHHHJJJJJJJJJJJJJJJIIJJJIIJIJJJJJJJJJIJJJJ\n-@ERR1044075.660686 HWI-ST790:248:H0G15ADXX:1:1106:8134:62985/1\n-TATCATTCAATTGATGAATTTAGTTGAATTGGGTAAATTACTTGTCGTTGT\n-+\n-CCBFFFFFHHHHHJJJJJJJJIIJJJJJJJIJJCFGHJJIIJJJHIGHIGG\n-@ERR1044075.661147 HWI-ST790:248:H0G15ADXX:1:1106:8314:63385/1\n-AATTGGAAGATTAGTAATATTAGCTGCTATAACTAAAAGAGCTCAGATTCC\n-+\n-@@CFFDEDHHFFHIACGIJGIJIGIIGGHGGGHGIIJGHHGIJJIIJIJHH\n-@ERR1044075.661472 HWI-ST790:248:H0G15ADXX:1:1106:13982:63648/1\n-TACAAATCATAAAGATATCGGAACTTTATATTTTATTTTTGGAGCTTGAGC\n-+\n-CCCFFFFFHHHHHJIJJJJJJJJJJJJJIIJJJJIJJJJJIIJJIJJGIJI\n-@ERR1044075.661879 HWI-ST790:248:H0G15ADXX:1:1106:7696:64091/1\n-TTCATGTAAAGTAGCTAATCAACTAAAAATTTTAATTCCAGTAGGAACTGC\n-+\n-@BBDFEBDFHHHHIIIJJIHIJIJGIJJJJIJIJGIIIIJIEFHHIJJIII\n-@ERR1044075.662140 HWI-ST790:248:H0G15ADXX:1:1106:8516:64305/1\n-CAGCAATTATATTAGCAGTTAATCGAACAGCTAATGTTCCAGGTCGAATAA\n-+\n-@@CFFFFFFHFHHJJBEHGCGHGCHIJEHG>FGIGHEDHHJHI??F@BDHH\n-@ERR1044075.662813 HWI-ST790:248:H0G15ADXX:1:1106:21170:64761/1\n-CCAAGACGTTCATAAGATACATTAGCTAAACAAAATAACCCAGAAGAACAT\n-+\n-?@BF?DEAFFDDFIJJJIJIGJIIIJIHHJGHHIJJJIJJIIGIJCHDFDD\n-@ERR1044075.663073 HWI-ST790:248:H0G15ADXX:1:1106:5648:65498/1\n-GATAAACTTCTGTGAAAAAAAGCTCAAAAAAATCTCACAAAAAATAAAAAT\n-+\n-?@<DDD>DF4CFACFBEEGH:G)@:?DGGGG<F9B?F4C>=@C@CCEGH##\n-@ERR1044075.663162 HWI-ST790:248:H0G15ADXX:1:1106:11759:65389/1\n-CAGAATATCTATGTTCAGCTGGTGGAGTATTTTGGTATCATTCAATTGATG\n-+\n-CCCFFFFFHHHHHIJJJJJJJJHHIHJFGHIJJJJFHIIJIJIJIIJJIJJ\n-@ERR1044075.663390 HWI-ST790:248:H0G15ADXX:1:1106:10400:65584/1\n-CAACCATTCATTCCAGCCTTCAATTAAAAGACTAATGATTATGCTACCTTT\n-+\n-CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJIJJJIJJJJIJGIJJ\n-@ERR1044075.663400 HWI-ST790:248:H0G15ADXX:1:1106:11192:65654/1\n-ATTGAAGCTCCATTTACTATTGCAGACTCAATTTATGGATCAACATTTTTT\n-+\n-@CCFFFFBHHHHGJJJJJJIIIIIEGIIIFHIIHFIIJGHGIIIJIJJJJJ\n-@ERR1044075.664278 HWI-ST790:248:H0G15ADXX:1:1106:20222:66346/1\n-CTAAAATTCGAATTTGTGAGTTTATGGGTAAAACTACTCGGTTATCAACAT\n-+\n-@@@FFFFFHGDHHIGIIIIGHHGIGIJJAFGHEGHIIEIJJEHIIJIJIJJ\n-@ERR1044075.664837 HWI-ST790:248:H0G15ADXX:1:1106:12794:67053/1\n-CATATATTTGCTCATTTAGTTCCTCAAGGAACACCCGCTATTCTTATACCT\n-+\n-@@CFFDEFHHHHHJJJJJJIIJJJJJJJJJJJJJJJJJJJJJIJJIIIJII\n-@ERR1044075.665607 HWI-ST790:248:H0G15ADXX:1:1106:15165:67769/1\n-CTCGTGATACATCTCGTCATCATTGATATACAGTTAAAATAGTAATAATAT\n-+\n-@?@DFDFDHHHFHIJIDEFEHCHIEHIIHIHIJIGGGGGDHIGHGGII@GG\n-@ERR1044075.665680 HWI-ST790:248:H0G15ADXX:1:1106:19772:67837/1\n-GTTAAATAATAAATGATTAAAAAGTCATTTCATTATTATATTTATTGGAGT\n-+\n-@?@DDFFFHHDHHGFFGIJECF>BBHEIJJJIIGAHHIIJIGGIGIHGAFG\n-@ERR1044075.665714 HWI-ST790:248:H0G15ADXX:1:1106:6180:68224/1\n-ATTTCCTAAAAGAGTTAATAATAAATGTCCAGCAATTATATTAGCAGTTAA\n-+\n-4114422=:CBFCEEFEAHH@4FFHCH<CAEF+A@E9C9:C?C<:?BFD@4\n-@ERR1044075.665736 HWI-ST790:248:H0G15ADXX:1:1106:7267:68163/1\n-AGAGAACCAAAAGTTTCCTTTTTTCCTGATTCTTGTCTAATAATATGAGAA\n-+\n-?@BDDFFFHHHHHJIJJJJJJJJJJGJEHIJGIJGHIGGECHHHGGADDHI\n-@ERR1044075.665878 HWI-ST790:248:H0G15ADXX:1:1106:16309:68134/1\n-CATGACCATTAACAGGAGCTATCGGAGCTATAACAACTGTATCAGGTATAG\n-+\n-CCCFFFFFHHGHHJJIGHJJJJJJJJIJJJIJJJIHIJIFHGEGIJ9BGII\n-@ERR1044075.666310 HWI-ST790:248:H0G15ADXX:1:1106:13497:68695/1\n-TAAAAAAACAAATCCTAAAGCTCATAAAATAGCTGGAGAATAAGAAAGTTG\n-+\n-CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJIJJJJJIIJJIJJJJJJJHII\n-@ERR1044075.666724 HWI-ST790:248:H0G15ADXX:1:1106:8127:69208/1\n-GCTCAAGTTACAGTAACTCCTGAAGCTAATAAAATAGCTGTATTTAATAAA\n-+\n-CCCFFFFFHHGHHIIJHJJJJJJJJIJJJJJJJJIJJJJJHHIJJJIGIJI\n'
b
diff -r 89e05f831259 -r 530626b0757c test-data/Samp_R.fastq
--- a/test-data/Samp_R.fastq Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,10000 +0,0 @@\n-@ERR1044075.680 HWI-ST790:248:H0G15ADXX:1:1101:10892:2723/2\n-ATTTTTTTTATAGTTATACCTATTATAATTGGTGGATTTGGAAATTGATTA\n-+\n-@<??DDADFFFFFDG@<G<F@GE??C:CG>FE*??0?DGFI<B>FDE<FFB\n-@ERR1044075.759 HWI-ST790:248:H0G15ADXX:1:1101:15792:2653/2\n-TGCTGCTTCAAAACCAAAATGATGATTTTTTGAGAAGTGATTATTTAAATG\n-+\n-CCCFFFFFHHHHHJJJJJJJJJJJIJJJJJJJGIIIJFGHIJGIJJJJIII\n-@ERR1044075.753 HWI-ST790:248:H0G15ADXX:1:1101:15644:2508/2\n-GGAACCTGTTTTTTAATCGATAATCCACGATGGACCTTACTTAAATTTGTA\n-+\n-@@CFFFFFHHHHHJJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJJJIHI\n-@ERR1044075.814 HWI-ST790:248:H0G15ADXX:1:1101:19529:2745/2\n-ATTTAATCCATTTCAAATTCCTTTATTAAATACAGCTATTTTATTAGCTTC\n-+\n-;@<DDFFFHHHFDHIIJIGHIHEGHIIIJJIJIJICHJIJIGIGJIE@GGE\n-@ERR1044075.946 HWI-ST790:248:H0G15ADXX:1:1101:8484:2908/2\n-ATTATATTAGCAGTTAATCGAACAGCTAATGTTCCAGGTCGAATAATATTT\n-+\n-CCCFFFFFHHGHHJJJJJJJJJJJJJJJJJJIJJJJIJGII?BBFHGGIIJ\n-@ERR1044075.818 HWI-ST790:248:H0G15ADXX:1:1101:19811:2591/2\n-GAGTATTTTGGTATCATTCAATTGATGAATTTAGTTGAATTGGGTAAATTA\n-+\n-@@BDDFFFHHGGHJJJIJIJIJIGIIJIGIJJDEHEHEGHJGGG:CGDHIE\n-@ERR1044075.978 HWI-ST790:248:H0G15ADXX:1:1101:10873:2966/2\n-AATTCATGTATTAATCGGAACAACTTTTTTATTAGTATGGTTACTACGACA\n-+\n-CCCFFFFFHHHHHJJJJJIJIJJIJJJJJJJJJIIHIJJIJJJIIFJJJJI\n-@ERR1044075.998 HWI-ST790:248:H0G15ADXX:1:1101:12703:2936/2\n-ATGTTCCTTCTCGTGATACATCTCGTCATCATTGATATACAGTTAAAATAG\n-+\n-CCCFFFFFHHHGHJJJJJJJJJJJJIJJJIJJJIGIIJJJJJJJIJJIJII\n-@ERR1044075.1137 HWI-ST790:248:H0G15ADXX:1:1101:1525:3138/2\n-GATAATAATAATATTAATTGTCCTAACCAAGAAGTTCTTAAGATAATATTA\n-+\n-@BCFFFFFHHFHHHJJJIIGHHHIJJFIHHHGGGGGGIDHGGI@GFC>DDI\n-@ERR1044075.1446 HWI-ST790:248:H0G15ADXX:1:1101:1567:3357/2\n-TGCAGCTATAGCTGCTCCTACACCTGTTTCTGCTTTAGTTCATTCTTCTAC\n-+\n-@@CDDDDDFHHHFHIIIJJJGGGCGJCHIIAHGIIEHIEE@FIIBHGHEHC\n-@ERR1044075.1652 HWI-ST790:248:H0G15ADXX:1:1101:16002:3269/2\n-CCGATTAATACATGAATTCCGTGAAATCCTGTTGCTATAAAAAATGTTGAT\n-+\n-@@BDDFFFHHHHHIIIIIJJIGHGIIEIGIIHIIIJGIIJJJIJIIGIEHI\n-@ERR1044075.2019 HWI-ST790:248:H0G15ADXX:1:1101:20598:3595/2\n-CTGCTCTTTCTTTACTATTAGTAAGTAGAATAGTTGAAAATGGGGCTGGGA\n-+\n-:41A:ADBFFDFFFBE4A<,<<CFCAF>,3<<FE4+9A?:<?E):?:?A=F\n-@ERR1044075.2025 HWI-ST790:248:H0G15ADXX:1:1101:1366:3788/2\n-AATTTATATGAAAAGTTTAAATAAAGAATTCGGCAAAAATAATATTCGCCT\n-+\n-CCCFFFFDHFBHFGIHIIHGGHIGGGAAFHHHGHIGGGGGGEGEDH4?FEI\n-@ERR1044075.2112 HWI-ST790:248:H0G15ADXX:1:1101:7863:3843/2\n-TGGAGATGATCAAATTTATAATGTAATTGTAACTGCACATGCTTTTATTAT\n-+\n-?@BBDBDFHHHHGIJJIJJJHJGJJIJJJFIIIIJIGIIGIGGIJIEIIII\n-@ERR1044075.2117 HWI-ST790:248:H0G15ADXX:1:1101:8122:3811/2\n-CTAAAATTGAAGAAATTCCTGCTAAATGTAGAGAAAAAATAGCTAAATCAA\n-+\n-CCCFFFFFHHGHHJJJJJJJJJJJJJJJHIJIIJJJJJJJJJJJJJJGIIJ\n-@ERR1044075.2555 HWI-ST790:248:H0G15ADXX:1:1101:1876:4399/2\n-GTAACTTTAAAAAGAATCGGCCATCAATGATATTGAAGTTACGAATATTCA\n-+\n-BB@FFFFFHHHHHJJJJJJJJJJJJJJJIIIIIIBHIIHIIJIIIDHIJII\n-@ERR1044075.3073 HWI-ST790:248:H0G15ADXX:1:1101:1378:4822/2\n-CTCCCGCTGGGTCAAAAAATGATGTATTTAAATTTCGATCTGTTAATAATA\n-+\n-CC@FFFFFHHHFHIJJJJIJJIIJGHIIIJJGJJJHIHJGIGIJJIIJIIG\n-@ERR1044075.3103 HWI-ST790:248:H0G15ADXX:1:1101:3753:4970/2\n-CTCCTGTTAATCCTCCTACTGTAAATAAAAAAACAAATCCTAAAGCTCATA\n-+\n-CCCFFFFFHHHHHJJJJIIJIHIJIIJJJJJJIGIJIIJJIJJJJGGGGHI\n-@ERR1044075.3115 HWI-ST790:248:H0G15ADXX:1:1101:4946:4934/2\n-AAAAAAGGAAACTTTTGGTTCTCTAGGAATAATTTATGCTATATTAGCTAT\n-+\n-CCCFFFFFHHHHHJJJJJHJJJJIJJJIIIJJJJJJJJJIIGGIIIIIIJI\n-@ERR1044075.3167 HWI-ST790:248:H0G15ADXX:1:1101:8133:4808/2\n-TGGAGCTTCAGTTGATTTAGCTATTTTTTCTCTACATTTAGCAGGAATTTC\n-+\n-CCCFFFFFHHHFHIFIJJJJJJJJJJJJJJJJJJJIIJJJIJJJJJJJJJI\n-@ERR1044075.3206 HWI-ST790:248:H0G15ADXX:1:1101:9789:4942/2\n-CTGTAAATAAAAAAACAAATCCTAAAGCTCATAAAATAGCTGGAGAATAAG\n-+\n-@@<;DDDDFHDFFDG<BEHFFF@E?FF:?FEC>BBFBB9?BBGH8?;*=F4\n-@ERR1044075.3251 HWI-ST790:248:H0G15ADXX:1:1101:12710:4758/2\n-GAATAAAACCTGCTATAATAGCAAATACAGCTCCTATAGATAAAACATAAT\n-+\n-CCCFFFFFHHHHHJJJJJJJIJJJJJJJJJJIJJJJJJGIIJJJJIJIJJJ\n-@ERR1044075.3552 HWI-ST790:248:H0G15ADXX:1:1101:10536:5030/2\n-CTGCTAAATGTAGAGAAAAAATAGCTAAATCAACTGAAGCTCCACCATGAG\n-+\n-B@CFFFFFHHFHHJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJJIJIIJ\n-@ERR1044075.3658 HWI-ST790:248:H0G15ADXX:1:1101:18523:5017/2\n-CTC'..b'IIJJH\n-@ERR1044075.659985 HWI-ST790:248:H0G15ADXX:1:1106:11631:62123/2\n-CCACAGATTTATGAACATTGACCATAAAATAAACCCGGTCGATTAATAAAA\n-+\n-:1++2<A+<7,C?A>=ACA7<7@AAA7C?A<BACBAA@A<A0=B<?B77=>\n-@ERR1044075.660170 HWI-ST790:248:H0G15ADXX:1:1106:7215:62363/2\n-TGAAAATGGAGCTGGGACAGGATGAACTGTTTATCCACCTCTATCCGCTGG\n-+\n-@CCFFFFFHHHHHJJJIJJJJGGJIJJJIIJJJJJJJJJJIJGJJJJJJII\n-@ERR1044075.660255 HWI-ST790:248:H0G15ADXX:1:1106:12160:62309/2\n-AAAGAATCGGCCATCAATGATATTGAAGTTACGAATATTCAGATTTTAATA\n-+\n-@@@FDDDDDAHFHGGIEIEF@HGIJJIIGJIIGHEHIBG:??BHGGGEBDI\n-@ERR1044075.660247 HWI-ST790:248:H0G15ADXX:1:1106:11554:62387/2\n-TGTTGCTATAAAAAATGTTGATCCATAAATTGAGTCTGCAATAGTAAATGG\n-+\n-@CBFFFFFHHHHHJJJJIIJJJJJJJJJIJJJGGHIJJIJJGEGGIJJIIJ\n-@ERR1044075.660686 HWI-ST790:248:H0G15ADXX:1:1106:8134:62985/2\n-CTGGAATACCTCGACGTTATTCAGATTACCCAGATGCTTACACAACATGAA\n-+\n-@CCFFFFFHHHHHJJJIJJJJJJJJJJJJJJJJIJJJJJJJJJJJIJJJJJ\n-@ERR1044075.661147 HWI-ST790:248:H0G15ADXX:1:1106:8314:63385/2\n-AGAAGAATGAACTAAAGCAGAAACAGGTGTAGGAGCAGCTATAGCTGCAGG\n-+\n-@@@FFFFDHHGHGJIJJIIIJIIIJGI<AFEGHHHIIHGDGEGII@GEHIJ\n-@ERR1044075.661472 HWI-ST790:248:H0G15ADXX:1:1106:13982:63648/2\n-CTCCAATTAATGCTCCAGGATGTCCTAATTCAGCTCGAATTAAAATTCTTA\n-+\n-BCCFFFFFHHHHHJJJJJJJJJIJJJJJJJJJJJJJJJJJJJJJIJJIJJJ\n-@ERR1044075.661879 HWI-ST790:248:H0G15ADXX:1:1106:7696:64091/2\n-GATTTATTGTATGAGCTCATCATATATTTACCGTTGGAATAGATGTAGATA\n-+\n-CCCFFFFFHHHHHJJJJJJIIJIJHJJJJHJIJIIJJGHJIG@DHDDHDGH\n-@ERR1044075.662140 HWI-ST790:248:H0G15ADXX:1:1106:8516:64305/2\n-TATGATTATGTTTTATATTATATGGATGAATTAATCATACACAACATATAT\n-+\n-@@@FDDFFHGDHHJJIEIJGHEIIJIJIJIJGHEHGIFHIACGIGAGGFDI\n-@ERR1044075.662813 HWI-ST790:248:H0G15ADXX:1:1106:21170:64761/2\n-CTTTAATTCCTTATTCATCTTTTGCTCATATAGGAATTTTTCTGTCAGGAC\n-+\n-@@@FFFFD,2AC?GHBFIEGIE<3:<FEHGEGC+?E3C*@CE**:?DC??D\n-@ERR1044075.663073 HWI-ST790:248:H0G15ADXX:1:1106:5648:65498/2\n-TTTGTATTAGGTAATATTATTACTATTTTAACTGTATATCAATGATGACGA\n-+\n-@@BFFFFFHHHCFHIJJJJJJJJIJJJJJJJIJJHIJIJJJIJIEHDIIJI\n-@ERR1044075.663162 HWI-ST790:248:H0G15ADXX:1:1106:11759:65389/2\n-ATTGGAGTTAATTTAACATTTTTTCCTCAACATTTTTTAGGATTGGCTGGA\n-+\n-CCCFFFFEHHHHHJJJJJJJJJJJJJJJJJJIJJJJJJJJJGIJJIIJJJA\n-@ERR1044075.663390 HWI-ST790:248:H0G15ADXX:1:1106:10400:65584/2\n-ATTCGCCTGTTTAACAAAAACATGTCTTTTTGAATTATATATAAAGTCTAA\n-+\n-CCCFFFFFHHGHHJJJJJJJIJJJJJJJJJJJJIJIJJIJIIJJJHIHHIJ\n-@ERR1044075.663400 HWI-ST790:248:H0G15ADXX:1:1106:11192:65654/2\n-CCTCATCAGTAAATTGTGATATATAAAAATAATCAAACTACATCGACAAAA\n-+\n-C@CFFFDEHFFHHIIJFHHHIIIJJGJHGGHHIFHGIGEGFEDDGCFGIHF\n-@ERR1044075.664278 HWI-ST790:248:H0G15ADXX:1:1106:20222:66346/2\n-AATTATTTTACTATTTATTGCTCTTCCTTCTTTACGTTTACTTTATTTATT\n-+\n-<@@FFFFFHGFGFGHIJEHHEGEHECHGHEHGIGEHHHIDHIHIGIIJHIG\n-@ERR1044075.664837 HWI-ST790:248:H0G15ADXX:1:1106:12794:67053/2\n-ATTTCCTAAAAGAGTTAATAATAAATGTCCAGCAATTATATTAGCAGTTAA\n-+\n-CCCFFFFFHHHHGJJIJJJJIIJIJJJHGGGJIJIJJJIGJJJJJIIIIHI\n-@ERR1044075.665607 HWI-ST790:248:H0G15ADXX:1:1106:15165:67769/2\n-TTTTCATTTAGTGGATTATAGTCCATGACCATTAACAGGAGCTATCGGAGC\n-+\n-@@CFFFFFHHHCFHHIIIJJJJIJJIJJJJIJJJIIJJIIIIJIJGIJIJJ\n-@ERR1044075.665680 HWI-ST790:248:H0G15ADXX:1:1106:19772:67837/2\n-CATCTGGGTAATCTGAATAACGTCGAGGTATTCCAGCCAATCCTAAAAAAT\n-+\n-@C@FFFFFDHHHFIIHGFHIIIFHIHJJ:EGIJIGHFHIHJIICGIGHIIJ\n-@ERR1044075.665714 HWI-ST790:248:H0G15ADXX:1:1106:6180:68224/2\n-GGAACACCCGCTATTCTTATACCTTTTATAGTATGTATTGAAACTATTAGA\n-+\n-CCCFFFFFHHHHHJJJJJIIJIJJJJJJJJJIIIIFHIIDHHHIIIJIJJJ\n-@ERR1044075.665736 HWI-ST790:248:H0G15ADXX:1:1106:7267:68163/2\n-ATTTTATACCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATT\n-+\n-CCCFFFFFHHHHHJJJIJJJJJJJJJJJJJJJGHIIJIJIJIJJJIHGAGI\n-@ERR1044075.665878 HWI-ST790:248:H0G15ADXX:1:1106:16309:68134/2\n-CGTGATACATCTCGTCATCATTGATATACAGTTAAAATAGTAATAATATTA\n-+\n-B@@DFFFFHHHGHJGIIIJJHHIJIIJJJIIIIJJJIIJIDEHGIJJIGJJ\n-@ERR1044075.666310 HWI-ST790:248:H0G15ADXX:1:1106:13497:68695/2\n-CTCATATTATTAGACAAGAATCAGGAAAAAAGGAAACTTTTGGTTCTCTAG\n-+\n-CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJIJJJJJJJJJJJIIIIJJJ\n-@ERR1044075.666724 HWI-ST790:248:H0G15ADXX:1:1106:8127:69208/2\n-CATACTTATGCAGTAACTATTGGTTTACGATGAGGAATAATTCTATTTATT\n-+\n-CCCFFFFFHHHHHJJJJJJJJJJHJJJJJIJIHIJIJIJIIJ<FFIIIGIJ\n'
b
diff -r 89e05f831259 -r 530626b0757c test-data/Samp_class_fraction_counts.tabular
--- a/test-data/Samp_class_fraction_counts.tabular Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-Low_complexity 0.0
-Simple_repeat 98.0
-rRNA 772.0
b
diff -r 89e05f831259 -r 530626b0757c test-data/Samp_family_fraction_counts.tabular
--- a/test-data/Samp_family_fraction_counts.tabular Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-Low_complexity 0.0
-Simple_repeat 98.0
-rRNA 772.0
b
diff -r 89e05f831259 -r 530626b0757c test-data/Samp_fraction_counts.tabular
--- a/test-data/Samp_fraction_counts.tabular Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,11 +0,0 @@
-A-rich Low_complexity Low_complexity 0
-LSU-rRNA_Cel rRNA rRNA 772
-_ATA_n Simple_repeat Simple_repeat 0
-_ATTTAT_n Simple_repeat Simple_repeat 0
-_ATT_n Simple_repeat Simple_repeat 9
-_AT_n Simple_repeat Simple_repeat 0
-_CTAATT_n Simple_repeat Simple_repeat 7
-_TAT_n Simple_repeat Simple_repeat 0
-_TA_n Simple_repeat Simple_repeat 0
-_TTA_n Simple_repeat Simple_repeat 82
-_T_n Simple_repeat Simple_repeat 0
b
diff -r 89e05f831259 -r 530626b0757c test-data/aligned_353.tab
--- a/test-data/aligned_353.tab Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-# Total reads aligned to repeated sequences
-15862067
b
diff -r 89e05f831259 -r 530626b0757c test-data/aligned_354.tab
--- a/test-data/aligned_354.tab Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-# Total reads aligned to repeated sequences
-28421096
b
diff -r 89e05f831259 -r 530626b0757c test-data/aligned_355.tab
--- a/test-data/aligned_355.tab Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-# Total reads aligned to repeated sequences
-10808170
b
diff -r 89e05f831259 -r 530626b0757c test-data/aligned_356.tab
--- a/test-data/aligned_356.tab Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-# Total reads aligned to repeated sequences
-29256707
b
diff -r 89e05f831259 -r 530626b0757c test-data/aligned_reads.tab
--- a/test-data/aligned_reads.tab Mon Mar 18 09:39:44 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-# Total reads aligned to repeated sequences
-
-2510
b
diff -r 89e05f831259 -r 530626b0757c test-data/chrY-1-500k.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrY-1-500k.fa Tue Apr 02 21:16:37 2024 +0000
b
b"@@ -0,0 +1,10001 @@\n+>chrY range=chrY:1-500000 5'pad=0 3'pad=0 strand=+ repeatMasking=none\n+GCGTTGTGAGCAACGACACTGAAAGCAACAATAATATTGTTAATTACTGC\n+TCAAAACTATAACACATTTAATCACTTACACAATTTACACACGCTTCCTG\n+CCTAGGAGTTGTGGGACCAGCTACCGATAATGCCAACAATACCGGGAAAC\n+GCGTCGCGGGACACCGATAACAATTTGGCGGTTAAAATGCTAGAACATCG\n+AGTACCAGACATGTTTTTCCTGCCGATATATCGAAGCCCAATAGCCCGCC\n+AACGCTCAAAACATAAATCGGAGTGTGAGACCAGATACCGAAGCCGTTAA\n+AAGCTTCGCGGTATATCGATAACAATTTGGCGGTAAAAATATTAGAACAT\n+CGAGTACCAACCATGTTTTTTTTTTCCTTCCGATTAATCGAAGTCAAACA\n+GCCCGCCAACGCTCCAAATTTAAATCGGAGTGTGAGACCAGCTACCGATG\n+CCGGTAAAAGCTTCGCGGGATATCGATAACAATCAGGTGGTTAAAAATAC\n+TATAACATCGAGTACCAGCCGTGTTTTTCCTGTCGATTTATCGAAGTCCA\n+ACAGCCCGCCTTCGCTCCGAACACAAATCGACAAGTCAGTAGCTTTTTTA\n+ACCCGGCAGTGGCGCCTGTGCAGCAGCTATTTAAAGTGAAAAACACGAAA\n+CGCAACAACAAAATGGACGGCCATAATGGAGACATAAATGAAGGATGGGC\n+AACAGTACTATCTATCTCGTCGGATGATAGTAACCAACTTTCGTCGCCGG\n+CGTCATTATAGTCTCATCGCTGGACACCACGCCAACGCCTGTCTAATGCA\n+AATCAACTTACATATTACTCTAAAATCTACTTATAACTGTCCCCTCTAAT\n+GATAAGCAATTTTTATGTTTACTTTAAACTTAAACCGATACCTTAAACTA\n+TGGCGCGCAAAATGACTACCAAGAAATCTAAAACTGTAAATTGTTGCAAA\n+CTATTTCTCCTAGTAATGTTAAATTTAAATGCGCGCATAATCGCAGCCAA\n+GTTAACCAAAAATGTCTCTCGTAAATGTAATTCCTAAATTAACACTTGTA\n+ACTAAGCGCGTATAAATACCGCAAATCCAAAAATGTAAATATGCCATAAA\n+TTGTTACTATATATTTTTTATAAAAAAATCAGAGTACAATAAAAATGCCA\n+GCGTCTATTAGGCGTTGAAAATTTAAAAAAAAAAAAAAAAAACTGAACTC\n+CCCACCCTGCTGAGACTAGAGGAAGAGGAGGCAGAGCTCAAAAGAATAAA\n+AAAACAGGAAGAGAGGGAAAGAAGGGAAAGGGAAAACCAAAAGTGGCCTC\n+CAGATAGGTGGTGTGAATTGGAAATAAACCGATATAATAAAAAATATAGA\n+AATGGCGATCTAACCAGGCAGGAAATTATAGAAAAATTCCGAGGGCAACC\n+ATTAAATGTACAACGAATAATCCTACCCGACTACGAAGGTGACTAAAAGT\n+AAATCAAAACAAACTAGGGTCGGAGCATAAAGCAGAAATANNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNN"..b'CAGATGAGGCTGACAAAAAATAGTACATTTTGAATTATTCAACAGA\n+CAACTAGACCGATTTATTTCCAGACATACCCAGTGGATGGCGTCTAAGAT\n+AACATTTCTTGTTAGTGATTTTAAAAACTGCAAGACACCACACCAAATGG\n+GACGCCAAAACTTATCTTACCAAAAAGCAGGAGAAAGATTAAAAAGGAAA\n+CTGGTGATCACGACAAAAGTCTTTTGATTCATGCAGCAACTGTTTTTGCT\n+AGAAAAGAATGTAATAGGGAACCTTTAAAAAAAACAGAATTTCCAAGCGA\n+ATCACCAGATGAAGCTCTAGCTTATCTCCTGAAAAACACGCTGACAAAAC\n+AGCAATATATAAGCACCAGGCTTTTAAATAAAAGCCATAACAGCGACATA\n+TATCCGCCATATAATGTAGTTATCGAAGGAAAATTACAGTGCCGACCAGA\n+AGTAATGGAAAACACTGCTCAAGTGCTATTAAGAAATCGCTTGGCTCATA\n+CAGCGCAAAGATTAATTAAGTTGCAATCTGATGTTTTCAAGCAATTTTCA\n+GATGTCTTTAAAATAAAATTAATTCGCAGCTATGGATTTGATGGGACAAC\n+TGGTAATAGTGCTAACAAGCAGAAATTTGAGACTGAAGCACTTGGCACAC\n+CAATTTCTGATCAATCTTTATTTGTAACTTCTGTAATACTTTATTTATGA\n+CAGTGGTCTACAGTTCTCGCTGCCATATGCCCGCAATAGTTCCTTGATAT\n+ATTGCGAATGGCCCAAAGTGATTTCTCCAAGGTCGCCATCTCGTTGCACC\n+TCCATGCCTAAGAACAAATGCAGTGGACCCTTGTCCGTGCACTCGAAAGA\n+CTCTGAAATCTTAGCTTTCAGATCTTCTCTTGACTGGCACGCTAGAATTA\n+AATCATCAACATATACTAAGATGAGCATCAGATTACCTTGACCACTTTGC\n+TGATAAAGACATGGTTCATGATTACAGGCCTTAAATCCCAAGTCTTTTAG\n+AACACCGTCGAGCTTGGAGTTCCACTCTCTGCCGGNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNATAATCTTATTTTTG\n+AAGTATTTTATTTGATTTCAAATTATTACTAGTTTATTAATTTTTCCTTT\n+GTCGGAAAATTCAACGATTCTTTGATCCCAAGCTACATAAACTTCCTTCA\n+CCAATTACCGTTTCGGTTTTAAATACTTTTTGTTCACCAATGTATTTTTT\n+ATAATAGAATCTTTTAACGCAAGAGCATTTTTTCCCCAATCAAACACAAA\n+TTTCCTCATTTTCGGATTCTAAACCTTCTATTTCATAAAATAGATGTTTT\n+GTTTCAAATATTGTTAATATTTTATAATTGAGTATCCATAAACTGTAGAT\n+ACTCCTTTGAATGTGGGCAATTCTTTTTTATTTTCATAAATTCCCATTTA\n+GCAACTCTGGTGGGTTTAGAGAAAAGACTGTTTCTGAGATTGAACCGCTC\n+ATTGTTTTATTCGTTTCCTTGGCTTTTTCTTATGAGTATTTTTTTTATTT\n+ATGAGTAATGTCATTTTCAAGTTTACTTCGTTGTAGAATTCTGTAATACT\n+TAAACGTCCCTGTCCATGGTTGCATATTTCTGATTCGAGGATATTGAGTG\n+GTCGATTATCGTTACAAATAAAAATCAATTGCTTGTAAGTTTAAAACTTT\n+ACCATGGATAGTTAAAGCGTCATTACTGCCCCCATGATTTTATTACTTAG\n+AATTGTTCAAGCAATAAAATATTGTTCGCTTTTAATTTTATATTTCTTTT\n+CTGCTGCTTCTCTCCAGACTACATATTGGATCATTTCACCTGTGAACTTT\n+GGTAAAGATTTAACTACAGGTTAGTTTCGCATTTTATAGTTGGGTCAAGT\n+GCTCGGATTTTATAATCTTCCACTTTATTTCTGATAGCGTTTAACTGCCC\n+TTCTATCCCTTCAATTTGTCTGGTCACTTAACCCAATTGTACATTTATTA\n+ATCGGTTAACTAATTCCATTGTCAGATTTGTGGCGCTGGGTATCCCGCCC\n+GCAGAATAGGGTGGTGCTGAACCTCCATTTGCCATTGTTAAATTTAAATC\n+TTCAAAACTATTTAATAATCTTTTCAGATTGATTTCTTTAATTAATTGTT\n+AAAACTGATTTTGTAATGTTTAATTTTAAACTTCATAATATTCCCTGTAT\n+CTTCCTCTTATTTTAATGATTTCTTTTAATTCTTTCTTTCTTTCTTTTCT\n+TCAACCTCGGCGTGAAAATTTTCTTTAGAAACTTCTTGAACTTTCAATTA\n+TGTTTATTGTAAACAGCTTTCCGCTGAAATTGTTGTACTGCATTGTGGAA\n+TTGAAATTTGCTGTCTGATGCAGTTTGATTGAAGCGGAAGCTGGCAAAGG\n+TAATGACAAAAACGAAGACAAAGGAAATGCCGCCGAGATTCGAATTTGAA\n+ATTTGTTTATAAACTGAAATTTTAAATCGATTTATAAACTGGAGTAGAGT\n+GCTATGTTTTCGGGTTTTTAATTTGTCTGAATATTGCAGAAATTTTTGTT\n+CAGCAACAATAATGACTCGGCTATTGATCGTGATCAAGAAAATATATATT\n+TGACATGATCGGAAACGTTTTCGTCTGCCTTCCTTTCTCAACGAGTAAGG\n+GGTATAAATAGTGCTGCGTTTACATCTTAATAATCTATATACTTCCCGGA\n+GATCTCAGTTCTCATACGTGCAGAAACGGTTATAACCTTTTCAACGAACC\n+TTCTACTGCACGAGTAATAAATAGATTTCATAAATTTATTTTTTATTATT\n+TTTAGGATGGCCAAAACGTGGAGTCAATGAGACAATTTTTGAGTACTATG\n+TGGATGATAACGGAAACTGGCAACACTGGAGCACACGCGTTGAAGAATTT\n+CGATATCCAGAAGATGAAATTCCAGAGTTTTCATCTATTTTAGTTCCGAA\n+TGTAGATAATGTGCGCACTGCTTTCCTTTTACATAATATTGCTAAGCAGC\n+TCAAGCAAGTTCTTTTAATTGGTGAGCAGGGTACTGCTAAGACAGTGATG\n+ATAAAGGCCTATATGGGTCATTACGATCCCGAAGTTCACATTTTTAAATC\n+CTTTAATTTTTCATCCGCTACCACGCCTAACATGTACCAGGTAAAATCAA\n+TGCATTTTTTATATAATATGTATAATTAAATGTTTGCAATATGTGAAAGT\n+GAAAAAAATATAATTTTTATTTCAGAAGAACACCAACATATTACAATTAG\n+GTCTCAAACTGAACTCTGATAATTATTCTGATTTCATTGACGTTCAAGCC\n+TCGGTATCGAGCTTTTCTGATATGGGTTCGGATCATGAAGAGAAAAAACC\n+TTGCGTTTCTCTTGGATTCAAGTGCATTCGCTCTTGGATACAAGTGCTCT\n+TATATTCTTATAATTTCTTTATTGAAATCGATACTTTTGTTTTTCGGGAT\n+TTAAATTAGGGGACGGATGTTTAGTCTACCTGTGGGTGACTTATCTAGAG\n+TTGGGGCTTTTCCACTCTCGAGGATCATGTGACAATCTTATTCTTATCTG\n+CTATCTGTTGGGTTATATGCTTTATACAGAGTCAAACTAATTATTATTAT\n+GACAGAAGTTGTAATGCATATAATTTAGAAAAATATATAATGTTTTACAT\n+GGGATTCTACCATTTCTCTCGTTTTTAAGTGATTTATTGGTTTTAAACGT\n+GTGACATTGTTGGTGATATAAATTGTTTGTTAAAGTCTGCTAGATTATTG\n+GAGATTAGGAATTCATATGTTGCTTAGATCGGCAGCTAAAAAAAATTCGG\n'
b
diff -r 89e05f831259 -r 530626b0757c test-data/chrY-1-500k.fa.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrY-1-500k.fa.out Tue Apr 02 21:16:37 2024 +0000
b
b'@@ -0,0 +1,672 @@\n+  192  10.4  9.8  2.2  chrY            1     256 (3667096) +  TART_B1        LINE/Jockey            590  864 (9790) 120772\n+   88  11.5  9.7  3.3  chrY          270     413 (3666939) +  TART_B1        LINE/Jockey            712  864 (9790) 120772\n+  631  14.8  8.1  4.6  chrY          426    1187 (3666165) +  TART_B1        LINE/Jockey           9741 10528  (126) 120773\n+  286   3.3  0.0  0.0  chrY         1192    1490 (3665862) +  TART-A         LINE/Jockey           9873 10171 (5405) 120774\n+  448   7.4  0.0  0.5  chrY         8991    9409 (3657943) +  HETA           LINE/Jockey           5661 6077    (4) 120775\n+    7  12.2  0.0  0.0  chrY         9408    9448 (3657904) C  DNAREP1_DM     RC/Helitron          (217)  377    337 120776\n+   38  19.9 15.5  1.8  chrY         9447    9543 (3657809) C  DNAREP1_DM     RC/Helitron          (340)  254    145 120776\n+   19   5.9  0.0  0.0  chrY         9560    9593 (3657759) C  DNAREP1_DM     RC/Helitron          (551)   43     10 120777\n+  167  19.0 32.1  0.7  chrY        10936   11253 (3656099) C  DNAREP1_DM     RC/Helitron            (0)  594    178 120778\n+  523   2.1  0.0  0.0  chrY        11410   11833 (3655519) +  PROTOP_A       DNA/P                    1  424  (681) 120779\n+  312   2.5  0.0  0.0  chrY        11821   12098 (3655254) C  PROTOP_A       DNA/P                (645)  460    183 120780\n+   15  18.1  2.6  0.0  chrY        12233   12270 (3655082) +  (GCCTTT)n      Simple_repeat            1   39    (0) 120781\n+   14  26.9  1.5  4.5  chrY        12814   12881 (3654471) +  A-rich         Low_complexity           1   66    (0) 120782\n+  229  30.4  3.5  9.6  chrY        13410   14042 (3653310) +  Chouto_I-int   LTR/Gypsy              854 1370 (4345) 120783\n+   15  18.1  2.6  0.0  chrY        14385   14422 (3652930) +  (GCCTTT)n      Simple_repeat            1   39    (0) 120784\n+  339   2.7  0.9  0.0  chrY        14819   15147 (3652205) C  TC1-2_DM       DNA/TcMar-Tc1          (0) 1644   1313 120785\n+  182  25.8  2.3  4.7  chrY        15489   15748 (3651604) +  Chouto_I-int   LTR/Gypsy             1117 1370 (4345) 120786\n+  316   2.2  0.0  0.0  chrY        16562   16839 (3650513) C  PROTOP_A       DNA/P                (645)  460    183 120787\n+   15  18.1  2.6  0.0  chrY        16974   17011 (3650341) +  (GCCTTT)n      Simple_repeat            1   39    (0) 120788\n+   14  28.5  1.5  3.0  chrY        17555   17621 (3649731) +  A-rich         Low_complexity           1   66    (0) 120789\n+   13  28.9  5.2  1.2  chrY        17622   17627 (3649725) +  (AACACA)n      Simple_repeat            5   80    (0) 120790\n+  357  26.2  5.4  5.4  chrY        18175   18691 (3648661) +  Chouto_I-int   LTR/Gypsy              854 1370 (4345) 120791\n+  717  34.9  4.9  4.8  chrY        19620   21544 (3645808) +  GTWIN_I-int    LTR/Gypsy             2720 4645 (1776) 120792\n+  404   2.8  0.7  0.2  chrY        22648   23056 (3644296) +  (TAATA)n       Simple_repeat            1  411    (0) 120793\n+  293   8.9  0.0  0.4  chrY        23057   23338 (3644014) C  Baggins1       LINE/LOA              (69) 5384   5104 120794\n+  855   5.7  5.7  0.0  chrY        23356   24131 (3643221) C  Baggins1       LINE/LOA             (730) 4723   3904 120794\n+   41  12.2 57.0  0.0  chrY        24134   24240 (3643112) C  Baggins1       LINE/LOA            (1656) 3797   3630 120794\n+   87  17.3 24.9  0.0  chrY        25318   25490 (3641862) +  DOC3_DM        LINE/Jockey           4195 4410  (330) 120795\n+   14  14.5  3.2  0.0  chrY        25604   25634 (3641718) +  (TTC)n         Simple_repeat            1   32    (0) 120796\n+  232  12.5 34.0  0.9  chrY        25758   26095 (3641257) C  DNAREP1_DM     RC/Helitron          (134)  460     12 120797\n+   34  14.3  0.0  0.0  chrY        29452   29507 (3637845) C  DNAREP1_DM     RC/Helitron            (0)  594    539 120798\n+   39  14.7  0.0  0.0  chrY        29529   29596 (3637756) C  DNAREP1_DM     RC/Helitron          (176)  418    351 120798\n+   76  23.'..b'   4126 4250  (226) 121171\n+  318  17.3 14.7  4.3  chrY       475840  476261 (3191091) +  Gypsy_LTR      LTR/Gypsy                1  464   (18) 121172\n+  139  11.3 32.4  1.3  chrY       476281  476459 (3190893) +  MICROPIA_I-int LTR/Gypsy             4243 4476    (0) 121171\n+  277  14.2  8.6  8.6  chrY       476463  476846 (3190506) +  MICROPIA_LTR   LTR/Gypsy               93  476    (0) 121173\n+   63  24.3 20.9  2.4  chrY       476847  476928 (3190424) C  DNAREP1_DM     RC/Helitron          (478)  116     10 121174\n+   12  21.6  2.5  2.5  chrY       477485  477524 (3189828) +  A-rich         Low_complexity           1   40    (0) 121175\n+  401   7.1  0.3  3.0  chrY       478056  478432 (3188920) +  ROVER-LTR_DM   LTR/Gypsy                1  367    (0) 121176\n+  802   8.9  2.0  8.5  chrY       478432  479782 (3187570) +  ROVER-I_DM     LTR/Gypsy                1 1396 (5188) 121176\n+ 2679  12.1  0.9  0.0  chrY       479777  482273 (3185079) +  ROVER-I_DM     LTR/Gypsy             1776 4295 (2289) 121176\n+ 1510  12.8  0.7  0.9  chrY       482275  485215 (3182137) +  ROVER-I_DM     LTR/Gypsy             3532 6584    (0) 121177\n+  431  23.1 18.6  1.4  chrY       485353  485488 (3181864) C  QUASIMODO_I-int LTR/Gypsy            (223) 5837   5661 121178\n+   30   4.8  2.2  2.2  chrY       485489  485533 (3181819) +  (TTA)n         Simple_repeat            1   45    (0) 121179\n+  431  23.1 18.6  1.4  chrY       485534  486301 (3181051) C  QUASIMODO_I-int LTR/Gypsy            (400) 5660   4781 121178\n+   95  21.6  5.9  0.5  chrY       486311  486496 (3180856) C  QUASIMODO_I-int LTR/Gypsy           (1447) 4613   4418 121178\n+    6  33.1 22.4  0.6  chrY       486491  486633 (3180719) C  QUASIMODO_I-int LTR/Gypsy           (1826) 4234   4061 121178\n+   40  38.1  0.0  0.0  chrY       486609  486742 (3180610) +  FROGGER_I-int  LTR/Copia              662  795 (1282) 121180\n+  178  38.9 19.0  1.4  chrY       486794  487481 (3179871) +  Copia1-I_DM    LTR/Copia             3239 4046   (78) 121181\n+  184  16.1 24.1  0.0  chrY       487537  487785 (3179567) C  DNAREP1_DM     RC/Helitron          (284)  310      2 121182\n+   14  18.6  8.2  0.0  chrY       487882  487930 (3179422) +  (TATAA)n       Simple_repeat            1   53    (0) 121183\n+   12  26.8  0.0  5.8  chrY       488005  488059 (3179293) +  A-rich         Low_complexity           1   52    (0) 121184\n+  403  13.4  9.2 10.3  chrY       492159  492758 (3174594) +  IDEFIX_LTR     LTR/Gypsy                1  594    (0) 121185\n+ 1507  14.8 11.1  1.2  chrY       492969  494609 (3172743) C  IDEFIX_I-int   LTR/Gypsy           (3461) 2167    365 121186\n+  363   5.7  1.8  0.0  chrY       494610  494996 (3172356) C  TRANSIB1       DNA/CMC-Transib        (0) 3014   2621 121187\n+   90   4.8  1.9  0.0  chrY       495001  495103 (3172249) C  TRANSIB1       DNA/CMC-Transib      (504) 2510   2406 121187\n+  225  21.8  4.3  1.6  chrY       495086  495388 (3171964) C  IDEFIX_I-int   LTR/Gypsy           (5316)  312      2 121186\n+  206   9.6  0.4 10.3  chrY       495783  496037 (3171315) C  IDEFIX_LTR     LTR/Gypsy             (72)  522    291 121188\n+  841   5.9 15.4  0.0  chrY       496013  496839 (3170513) +  TRANSIB2       DNA/CMC-Transib        514 1467 (1377) 121189\n+  406   0.9  2.7  0.0  chrY       496853  497185 (3170167) C  DM1731_I-int   LTR/Copia            (760) 3216   2875 121190\n+  127  32.9 11.5  1.2  chrY       497647  498067 (3169285) C  BURDOCK_I-int  LTR/Gypsy           (4378) 1337    631 121191\n+   15   7.5  0.0  6.7  chrY       498420  498451 (3168901) +  (TTTC)n        Simple_repeat            1   30    (0) 121192\n+   50  14.7 47.4  0.0  chrY       498763  498857 (3168495) C  DNAREP1_DM     RC/Helitron          (454)  140      1 121193\n+  265   8.5 17.8  0.1  chrY       499374  499971 (3167381) C  IDEFIX_LTR     LTR/Gypsy              (0)  594     99 121194\n+  269   5.2 19.8  0.9  chrY       499964  500407 (3166945) +  DMCR1A         LINE/CR1               891 1418 (3052) 121195\n'
b
diff -r 89e05f831259 -r 530626b0757c test-data/chrY-500k.R1.fastqsanger.gz
b
Binary file test-data/chrY-500k.R1.fastqsanger.gz has changed
b
diff -r 89e05f831259 -r 530626b0757c test-data/chrY-500k.R2.fastqsanger.gz
b
Binary file test-data/chrY-500k.R2.fastqsanger.gz has changed
b
diff -r 89e05f831259 -r 530626b0757c test-data/chrY_paired_class_fraction_counts.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrY_paired_class_fraction_counts.tab Tue Apr 02 21:16:37 2024 +0000
b
@@ -0,0 +1,6 @@
+DNA 53.0
+LINE 197.0
+LTR 22125.0
+Low_complexity 0.0
+RC 0.0
+Simple_repeat 77.0
b
diff -r 89e05f831259 -r 530626b0757c test-data/chrY_paired_family_fraction_counts.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrY_paired_family_fraction_counts.tab Tue Apr 02 21:16:37 2024 +0000
b
@@ -0,0 +1,13 @@
+CMC-Transib 18.0
+CR1 0.0
+Copia 21226.0
+Gypsy 876.0
+Helitron 0.0
+Jockey 91.0
+LOA 0.0
+Low_complexity 0.0
+P 27.0
+Pao 23.0
+R1 106.0
+Simple_repeat 77.0
+TcMar-Tc1 8.0
b
diff -r 89e05f831259 -r 530626b0757c test-data/chrY_paired_fraction_counts.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrY_paired_fraction_counts.tab Tue Apr 02 21:16:37 2024 +0000
b
@@ -0,0 +1,145 @@
+A-rich Low_complexity Low_complexity 0.0
+ACCORD2_I-int LTR Gypsy 0.0
+ACCORD2_LTR LTR Gypsy 0.0
+ACCORD_I-int LTR Gypsy 0.0
+BARI1 DNA TcMar-Tc1 0.0
+BATUMI_LTR LTR Pao 0.0
+BS LINE Jockey 0.0
+BS2 LINE Jockey 50.0
+BURDOCK_I-int LTR Gypsy 0.0
+Baggins1 LINE LOA 0.0
+Bica_I-int LTR Gypsy 36.0
+Bica_LTR LTR Gypsy 1.0
+CIRCE LTR Gypsy 0.0
+Chouto_I-int LTR Gypsy 3.0
+Copia1-I_DM LTR Copia 0.0
+Copia_I-int LTR Copia 20956.0
+Copia_LTR LTR Copia 270.0
+DIVER2_I-int LTR Pao 0.0
+DIVER2_LTR LTR Pao 1.0
+DM1731_I-int LTR Copia 0.0
+DM1731_LTR LTR Copia 0.0
+DM176_I-int LTR Gypsy 0.0
+DM412 LTR Gypsy 2.0
+DM412B_LTR LTR Gypsy 0.0
+DMCR1A LINE CR1 0.0
+DMLTR5 LTR Gypsy 0.0
+DMRT1A LINE R1 0.0
+DMRT1B LINE R1 106.0
+DMRT1C LINE R1 0.0
+DNAREP1_DM RC Helitron 0.0
+DOC2_DM LINE Jockey 0.0
+DOC3_DM LINE Jockey 0.0
+FB4_DM DNA TcMar-Tc1 4.0
+FROGGER_I-int LTR Copia 0.0
+FW2_DM LINE Jockey 0.0
+G3_DM LINE Jockey 0.0
+G5A_DM LINE Jockey 0.0
+G5_DM LINE Jockey 0.0
+G6_DM LINE Jockey 0.0
+GA-rich Low_complexity Low_complexity 0.0
+GTWIN_I-int LTR Gypsy 9.0
+G_DM LINE Jockey 0.0
+Gypsy11_I-int LTR Gypsy 0.0
+Gypsy11_LTR LTR Gypsy 0.0
+Gypsy12_LTR LTR Gypsy 0.0
+Gypsy2-I_DM LTR Gypsy 0.0
+Gypsy2-LTR_DM LTR Gypsy 0.0
+Gypsy3_LTR LTR Gypsy 0.0
+Gypsy4_I-int LTR Gypsy 0.0
+Gypsy5_I-int LTR Gypsy 0.0
+Gypsy6A_LTR LTR Gypsy 1.0
+Gypsy6_I-int LTR Gypsy 26.0
+Gypsy8_I-int LTR Gypsy 0.0
+Gypsy8_LTR LTR Gypsy 0.0
+Gypsy9_I-int LTR Gypsy 0.0
+Gypsy_I-int LTR Gypsy 24.0
+Gypsy_LTR LTR Gypsy 0.0
+HELENA_RT LINE Jockey 0.0
+HETA LINE Jockey 24.0
+HMSBEAGLE_I-int LTR Gypsy 0.0
+IDEFIX_I-int LTR Gypsy 1.0
+IDEFIX_LTR LTR Gypsy 0.0
+Invader1_I-int LTR Gypsy 0.0
+Invader1_LTR LTR Gypsy 0.0
+Invader2_I-int LTR Gypsy 0.0
+Invader4_I-int LTR Gypsy 0.0
+Invader4_LTR LTR Gypsy 0.0
+Invader5_I-int LTR Gypsy 0.0
+Invader5_LTR LTR Gypsy 0.0
+Invader6_I-int LTR Gypsy 0.0
+Invader6_LTR LTR Gypsy 0.0
+MAX_I-int LTR Pao 22.0
+MAX_LTR LTR Pao 0.0
+MDG1_I-int LTR Gypsy 0.0
+MDG1_LTR LTR Gypsy 0.0
+MDG3_I-int LTR Gypsy 92.0
+MDG3_LTR LTR Gypsy 1.0
+MICROPIA_I-int LTR Gypsy 40.0
+MICROPIA_LTR LTR Gypsy 2.0
+Mariner2_DM DNA TcMar-Tc1 0.0
+NINJA_I-int LTR Pao 0.0
+NOMAD_I-int LTR Gypsy 0.0
+PROTOP_A DNA P 27.0
+PROTOP_B DNA P 0.0
+QUASIMODO2-I_DM LTR Gypsy 24.0
+QUASIMODO2-LTR_DM LTR Gypsy 0.0
+QUASIMODO_I-int LTR Gypsy 85.5
+QUASIMODO_LTR LTR Gypsy 14.5
+R1_DM LINE R1 0.0
+ROOA_I-int LTR Pao 0.0
+ROOA_LTR LTR Pao 0.0
+ROVER-I_DM LTR Gypsy 286.0
+ROVER-LTR_DM LTR Gypsy 2.0
+S2_DM DNA TcMar-Tc1 0.0
+STALKER4_I-int LTR Gypsy 133.0
+STALKER4_LTR LTR Gypsy 22.0
+S_DM DNA TcMar-Tc1 4.0
+Stalker2_I-int LTR Gypsy 69.0
+Stalker2_LTR LTR Gypsy 2.0
+TART-A LINE Jockey 4.0
+TART_B1 LINE Jockey 13.0
+TC1-2_DM DNA TcMar-Tc1 0.0
+TC1_DM DNA TcMar-Tc1 0.0
+TLD2 LTR Gypsy 0.0
+TRANSIB1 DNA CMC-Transib 0.0
+TRANSIB2 DNA CMC-Transib 18.0
+ZAM_I-int LTR Gypsy 0.0
+_AACACA_n Simple_repeat Simple_repeat 0.0
+_AAT_n Simple_repeat Simple_repeat 0.0
+_ACAATAG_n Simple_repeat Simple_repeat 0.0
+_ACC_n Simple_repeat Simple_repeat 0.0
+_AGAGAAG_n Simple_repeat Simple_repeat 2.5
+_AGAGA_n Simple_repeat Simple_repeat 33.5
+_ATAAT_n Simple_repeat Simple_repeat 0.0
+_ATATATT_n Simple_repeat Simple_repeat 0.0
+_ATATTAT_n Simple_repeat Simple_repeat 0.0
+_ATTTTT_n Simple_repeat Simple_repeat 0.0
+_ATT_n Simple_repeat Simple_repeat 0.0
+_AT_n Simple_repeat Simple_repeat 0.0
+_A_n Simple_repeat Simple_repeat 0.0
+_CATA_n Simple_repeat Simple_repeat 0.0
+_CTTTT_n Simple_repeat Simple_repeat 0.0
+_GAGAA_n Simple_repeat Simple_repeat 38.0
+_GCCTTT_n Simple_repeat Simple_repeat 3.0
+_TAATAT_n Simple_repeat Simple_repeat 0.0
+_TAATA_n Simple_repeat Simple_repeat 0.0
+_TATAAAA_n Simple_repeat Simple_repeat 0.0
+_TATAA_n Simple_repeat Simple_repeat 0.0
+_TATCATG_n Simple_repeat Simple_repeat 0.0
+_TA_n Simple_repeat Simple_repeat 0.0
+_TGTTG_n Simple_repeat Simple_repeat 0.0
+_TTATATA_n Simple_repeat Simple_repeat 0.0
+_TTATAT_n Simple_repeat Simple_repeat 0.0
+_TTATA_n Simple_repeat Simple_repeat 0.0
+_TTA_n Simple_repeat Simple_repeat 0.0
+_TTCTT_n Simple_repeat Simple_repeat 0.0
+_TTC_n Simple_repeat Simple_repeat 0.0
+_TTTAT_n Simple_repeat Simple_repeat 0.0
+_TTTA_n Simple_repeat Simple_repeat 0.0
+_TTTC_n Simple_repeat Simple_repeat 0.0
+_TTTGA_n Simple_repeat Simple_repeat 0.0
+_TTTTAG_n Simple_repeat Simple_repeat 0.0
+_TTTTCTT_n Simple_repeat Simple_repeat 0.0
+_TTTTC_n Simple_repeat Simple_repeat 0.0
+_T_n Simple_repeat Simple_repeat 0.0
b
diff -r 89e05f831259 -r 530626b0757c test-data/chrY_single_class_fraction_counts.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrY_single_class_fraction_counts.tab Tue Apr 02 21:16:37 2024 +0000
b
@@ -0,0 +1,6 @@
+DNA 72.0
+LINE 121.0
+LTR 13181.0
+Low_complexity 0.0
+RC 0.0
+Simple_repeat 89.0
b
diff -r 89e05f831259 -r 530626b0757c test-data/chrY_single_family_fraction_counts.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrY_single_family_fraction_counts.tab Tue Apr 02 21:16:37 2024 +0000
b
@@ -0,0 +1,13 @@
+CMC-Transib 12.0
+CR1 2.0
+Copia 12453.0
+Gypsy 701.0
+Helitron 0.0
+Jockey 51.0
+LOA 0.0
+Low_complexity 0.0
+P 27.0
+Pao 27.0
+R1 68.0
+Simple_repeat 89.0
+TcMar-Tc1 33.0
b
diff -r 89e05f831259 -r 530626b0757c test-data/chrY_single_fraction_counts.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrY_single_fraction_counts.tab Tue Apr 02 21:16:37 2024 +0000
b
@@ -0,0 +1,145 @@
+A-rich Low_complexity Low_complexity 0.0
+ACCORD2_I-int LTR Gypsy 0.0
+ACCORD2_LTR LTR Gypsy 0.0
+ACCORD_I-int LTR Gypsy 0.0
+BARI1 DNA TcMar-Tc1 0.0
+BATUMI_LTR LTR Pao 0.0
+BS LINE Jockey 0.0
+BS2 LINE Jockey 29.0
+BURDOCK_I-int LTR Gypsy 0.0
+Baggins1 LINE LOA 0.0
+Bica_I-int LTR Gypsy 25.0
+Bica_LTR LTR Gypsy 0.0
+CIRCE LTR Gypsy 0.0
+Chouto_I-int LTR Gypsy 2.0
+Copia1-I_DM LTR Copia 0.0
+Copia_I-int LTR Copia 12327.0
+Copia_LTR LTR Copia 123.0
+DIVER2_I-int LTR Pao 1.0
+DIVER2_LTR LTR Pao 1.0
+DM1731_I-int LTR Copia 3.0
+DM1731_LTR LTR Copia 0.0
+DM176_I-int LTR Gypsy 0.0
+DM412 LTR Gypsy 8.0
+DM412B_LTR LTR Gypsy 0.0
+DMCR1A LINE CR1 2.0
+DMLTR5 LTR Gypsy 0.0
+DMRT1A LINE R1 0.0
+DMRT1B LINE R1 68.0
+DMRT1C LINE R1 0.0
+DNAREP1_DM RC Helitron 0.0
+DOC2_DM LINE Jockey 0.0
+DOC3_DM LINE Jockey 0.0
+FB4_DM DNA TcMar-Tc1 12.0
+FROGGER_I-int LTR Copia 0.0
+FW2_DM LINE Jockey 0.0
+G3_DM LINE Jockey 0.0
+G5A_DM LINE Jockey 0.0
+G5_DM LINE Jockey 0.0
+G6_DM LINE Jockey 0.0
+GA-rich Low_complexity Low_complexity 0.0
+GTWIN_I-int LTR Gypsy 8.0
+G_DM LINE Jockey 0.0
+Gypsy11_I-int LTR Gypsy 0.0
+Gypsy11_LTR LTR Gypsy 0.0
+Gypsy12_LTR LTR Gypsy 0.0
+Gypsy2-I_DM LTR Gypsy 3.0
+Gypsy2-LTR_DM LTR Gypsy 0.0
+Gypsy3_LTR LTR Gypsy 0.0
+Gypsy4_I-int LTR Gypsy 0.0
+Gypsy5_I-int LTR Gypsy 0.0
+Gypsy6A_LTR LTR Gypsy 0.0
+Gypsy6_I-int LTR Gypsy 16.0
+Gypsy8_I-int LTR Gypsy 0.0
+Gypsy8_LTR LTR Gypsy 0.0
+Gypsy9_I-int LTR Gypsy 0.0
+Gypsy_I-int LTR Gypsy 16.0
+Gypsy_LTR LTR Gypsy 0.0
+HELENA_RT LINE Jockey 0.0
+HETA LINE Jockey 12.0
+HMSBEAGLE_I-int LTR Gypsy 1.0
+IDEFIX_I-int LTR Gypsy 3.0
+IDEFIX_LTR LTR Gypsy 1.0
+Invader1_I-int LTR Gypsy 0.0
+Invader1_LTR LTR Gypsy 0.0
+Invader2_I-int LTR Gypsy 0.0
+Invader4_I-int LTR Gypsy 0.0
+Invader4_LTR LTR Gypsy 0.0
+Invader5_I-int LTR Gypsy 0.0
+Invader5_LTR LTR Gypsy 0.0
+Invader6_I-int LTR Gypsy 0.0
+Invader6_LTR LTR Gypsy 0.0
+MAX_I-int LTR Pao 24.0
+MAX_LTR LTR Pao 1.0
+MDG1_I-int LTR Gypsy 0.0
+MDG1_LTR LTR Gypsy 0.0
+MDG3_I-int LTR Gypsy 67.0
+MDG3_LTR LTR Gypsy 2.0
+MICROPIA_I-int LTR Gypsy 45.0
+MICROPIA_LTR LTR Gypsy 2.0
+Mariner2_DM DNA TcMar-Tc1 0.0
+NINJA_I-int LTR Pao 0.0
+NOMAD_I-int LTR Gypsy 0.0
+PROTOP_A DNA P 27.0
+PROTOP_B DNA P 0.0
+QUASIMODO2-I_DM LTR Gypsy 19.0
+QUASIMODO2-LTR_DM LTR Gypsy 0.0
+QUASIMODO_I-int LTR Gypsy 94.0
+QUASIMODO_LTR LTR Gypsy 15.0
+R1_DM LINE R1 0.0
+ROOA_I-int LTR Pao 0.0
+ROOA_LTR LTR Pao 0.0
+ROVER-I_DM LTR Gypsy 203.0
+ROVER-LTR_DM LTR Gypsy 3.0
+S2_DM DNA TcMar-Tc1 0.0
+STALKER4_I-int LTR Gypsy 106.0
+STALKER4_LTR LTR Gypsy 22.0
+S_DM DNA TcMar-Tc1 21.0
+Stalker2_I-int LTR Gypsy 38.0
+Stalker2_LTR LTR Gypsy 2.0
+TART-A LINE Jockey 2.0
+TART_B1 LINE Jockey 8.0
+TC1-2_DM DNA TcMar-Tc1 0.0
+TC1_DM DNA TcMar-Tc1 0.0
+TLD2 LTR Gypsy 0.0
+TRANSIB1 DNA CMC-Transib 0.0
+TRANSIB2 DNA CMC-Transib 12.0
+ZAM_I-int LTR Gypsy 0.0
+_AACACA_n Simple_repeat Simple_repeat 0.0
+_AAT_n Simple_repeat Simple_repeat 0.0
+_ACAATAG_n Simple_repeat Simple_repeat 0.0
+_ACC_n Simple_repeat Simple_repeat 0.0
+_AGAGAAG_n Simple_repeat Simple_repeat 2.5
+_AGAGA_n Simple_repeat Simple_repeat 41.5
+_ATAAT_n Simple_repeat Simple_repeat 0.0
+_ATATATT_n Simple_repeat Simple_repeat 0.0
+_ATATTAT_n Simple_repeat Simple_repeat 0.0
+_ATTTTT_n Simple_repeat Simple_repeat 0.0
+_ATT_n Simple_repeat Simple_repeat 0.0
+_AT_n Simple_repeat Simple_repeat 0.0
+_A_n Simple_repeat Simple_repeat 0.0
+_CATA_n Simple_repeat Simple_repeat 0.0
+_CTTTT_n Simple_repeat Simple_repeat 0.0
+_GAGAA_n Simple_repeat Simple_repeat 44.0
+_GCCTTT_n Simple_repeat Simple_repeat 1.0
+_TAATAT_n Simple_repeat Simple_repeat 0.0
+_TAATA_n Simple_repeat Simple_repeat 0.0
+_TATAAAA_n Simple_repeat Simple_repeat 0.0
+_TATAA_n Simple_repeat Simple_repeat 0.0
+_TATCATG_n Simple_repeat Simple_repeat 0.0
+_TA_n Simple_repeat Simple_repeat 0.0
+_TGTTG_n Simple_repeat Simple_repeat 0.0
+_TTATATA_n Simple_repeat Simple_repeat 0.0
+_TTATAT_n Simple_repeat Simple_repeat 0.0
+_TTATA_n Simple_repeat Simple_repeat 0.0
+_TTA_n Simple_repeat Simple_repeat 0.0
+_TTCTT_n Simple_repeat Simple_repeat 0.0
+_TTC_n Simple_repeat Simple_repeat 0.0
+_TTTAT_n Simple_repeat Simple_repeat 0.0
+_TTTA_n Simple_repeat Simple_repeat 0.0
+_TTTC_n Simple_repeat Simple_repeat 0.0
+_TTTGA_n Simple_repeat Simple_repeat 0.0
+_TTTTAG_n Simple_repeat Simple_repeat 0.0
+_TTTTCTT_n Simple_repeat Simple_repeat 0.0
+_TTTTC_n Simple_repeat Simple_repeat 0.0
+_T_n Simple_repeat Simple_repeat 0.0
b
diff -r 89e05f831259 -r 530626b0757c test-data/edgeR_plots.pdf
b
Binary file test-data/edgeR_plots.pdf has changed
b
diff -r 89e05f831259 -r 530626b0757c test-data/edgeR_result_file.tab
--- a/test-data/edgeR_result_file.tab Mon Mar 18 09:39:44 2024 +0000
+++ b/test-data/edgeR_result_file.tab Tue Apr 02 21:16:37 2024 +0000
b
b'@@ -1,211 +1,211 @@\n TE_item\tlog2FC\tFDR\tClass\tType\n-DM297_I-int\t 2.69047105\t4.4945e-21\tLTR\tGypsy\n-DM297_LTR\t 1.74961921\t1.3397e-18\tLTR\tGypsy\n-Gypsy1-I_DM\t 2.37388633\t4.4803e-16\tLTR\tGypsy\n-NOF_FB\t-2.79644759\t4.4803e-16\tDNA\tMULE-NOF\n-G7_DM\t 3.25987375\t1.5530e-15\tLINE\tJockey\n-Gypsy_I-int\t 1.74702180\t4.3525e-12\tLTR\tGypsy\n-LINEJ1_DM\t 1.94429218\t5.1807e-12\tLINE\tJockey\n-TOM_I-int\t 1.33935222\t6.6923e-12\tLTR\tGypsy\n-GTWIN_LTR\t 1.89490932\t8.8059e-12\tLTR\tGypsy\n-Gypsy_LTR\t 1.56024436\t2.9985e-11\tLTR\tGypsy\n-DM1731_I-int\t 1.53134430\t6.4921e-10\tLTR\tCopia\n-Gypsy6_LTR\t 1.53023046\t1.0751e-06\tLTR\tGypsy\n-LOOPER1_DM\t-2.46983670\t1.6192e-06\tDNA\tPiggyBac\n-TIRANT_LTR\t 1.12203469\t1.7801e-06\tLTR\tGypsy\n-Gypsy12_I-int\t-1.12204057\t2.1949e-06\tLTR\tGypsy\n-R2_DM\t 1.67889583\t2.7304e-06\tLINE\tR2\n-TRANSPAC_LTR\t 1.49364968\t2.9021e-06\tLTR\tGypsy\n-Copia_LTR\t 1.66649088\t1.1860e-05\tLTR\tCopia\n-ACCORD_LTR\t 1.37240595\t1.7595e-05\tLTR\tGypsy\n-TIRANT_I-int\t 1.17728139\t2.0469e-05\tLTR\tGypsy\n-TART_B1\t-1.14695346\t3.0966e-05\tLINE\tJockey\n-TRANSIB1\t-1.39021194\t6.1650e-05\tDNA\tCMC-Transib\n-DM412B_LTR\t 0.80309005\t1.2443e-04\tLTR\tGypsy\n-MICROPIA_LTR\t-1.29556276\t1.2985e-04\tLTR\tGypsy\n-G6_DM\t-1.05894869\t1.3831e-04\tLINE\tJockey\n-NINJA_I-int\t-0.87426452\t1.5554e-04\tLTR\tPao\n-R1_DM\t 0.95590515\t1.8985e-04\tLINE\tR1\n-Invader6_LTR\t-1.18865109\t2.1299e-04\tLTR\tGypsy\n-Copia_I-int\t 1.93316905\t2.2313e-04\tLTR\tCopia\n-BLASTOPIA_LTR\t 1.34881959\t3.6488e-04\tLTR\tGypsy\n-LSU-rRNA_Hsa\t 2.61189118\t8.0160e-04\trRNA\trRNA\n-TRANSPAC_I-int\t 1.16122218\t8.4531e-04\tLTR\tGypsy\n-G4_DM\t-0.74970144\t9.1437e-04\tLINE\tJockey\n-SSU-rRNA_Hsa\t 1.12389968\t1.0426e-03\trRNA\trRNA\n-DOC5_DM\t-0.80987287\t1.6241e-03\tLINE\tJockey\n-Invader1_I-int\t 0.74972783\t1.9892e-03\tLTR\tGypsy\n-TRANSIB4\t-1.00230688\t2.0605e-03\tDNA\tCMC-Transib\n-MICROPIA_I-int\t-0.75119303\t2.9293e-03\tLTR\tGypsy\n-PLACW_DM\t 1.16310826\t3.0518e-03\tDNA\tP\n-TABOR_I-int\t 0.73770219\t3.5495e-03\tLTR\tGypsy\n-I_DM\t 1.04141382\t4.5715e-03\tLINE\tI\n-Gypsy3_I-int\t 0.58659480\t5.3315e-03\tLTR\tGypsy\n-DOC\t 0.94015336\t5.5212e-03\tLINE\tJockey\n-Helitron1_DM\t-5.10401049\t6.3658e-03\tRC\tHelitron\n-Invader5_I-int\t-0.97342264\t6.6603e-03\tLTR\tGypsy\n-ACCORD_I-int\t 0.59304769\t7.0136e-03\tLTR\tGypsy\n-LSU-rRNA_Dme\t 0.90220479\t1.0210e-02\trRNA\trRNA\n-DM412\t 0.77597081\t1.0808e-02\tLTR\tGypsy\n-NINJA_LTR\t-0.92340817\t1.0956e-02\tLTR\tPao\n-BATUMI_I-int\t-0.56286761\t1.4855e-02\tLTR\tPao\n-DMRT1C\t-0.66474417\t1.7456e-02\tLINE\tR1\n-TAHRE\t-0.50950303\t1.9854e-02\tLINE\tJockey\n-Gypsy8_I-int\t-0.57827690\t2.4898e-02\tLTR\tGypsy\n-BLASTOPIA_I-int\t 0.79698775\t3.0182e-02\tLTR\tGypsy\n-DM176_LTR\t-0.69138467\t4.3254e-02\tLTR\tGypsy\n-DMRT1A\t-0.50960841\t4.3254e-02\tLINE\tR1\n-Gypsy11_I-int\t-0.73463462\t4.3254e-02\tLTR\tGypsy\n-QUASIMODO2-I_DM\t 0.47005794\t4.4414e-02\tLTR\tGypsy\n-MuDR-1_DEl\t 1.35524443\t5.3232e-02\tDNA\tMULE-NOF\n-DM1731_LTR\t 0.75223818\t5.4013e-02\tLTR\tCopia\n-FROGGER_I-int\t-0.60531971\t5.4013e-02\tLTR\tCopia\n-MAX_I-int\t-0.51645609\t6.4340e-02\tLTR\tPao\n-BURDOCK_I-int\t 0.41822333\t6.9121e-02\tLTR\tGypsy\n-FROGGER_LTR\t 0.88832640\t7.0636e-02\tLTR\tCopia\n-DIVER_I-int\t 0.57905223\t7.3816e-02\tLTR\tPao\n-Chouto_I-int\t-0.55102999\t8.1512e-02\tLTR\tGypsy\n-DM176_I-int\t 0.38700877\t9.1864e-02\tLTR\tGypsy\n-Invader4_I-int\t 0.53274705\t9.4896e-02\tLTR\tGypsy\n-G3_DM\t-0.60764816\t1.0316e-01\tLINE\tJockey\n-Gypsy4_LTR\t 0.67676363\t1.0531e-01\tLTR\tGypsy\n-BATUMI_LTR\t-0.62559888\t1.1233e-01\tLTR\tPao\n-Gypsy2-I_DM\t 0.41886069\t1.2754e-01\tLTR\tGypsy\n-Invader3_LTR\t-0.55372613\t1.3023e-01\tLTR\tGypsy\n-MDG3_I-int\t 0.41719543\t1.3084e-01\tLTR\tGypsy\n-SSU-rRNA_Dme\t 0.50182621\t1.3084e-01\trRNA\trRNA\n-G5A_DM\t-0.46837522\t1.4239e-01\tLINE\tJockey\n-S2_DM\t-0.57644415\t1.5438e-01\tDNA\tTcMar-Tc1\n-Stalker2_I-int\t-0.42481708\t1.5792e-01\tLTR\tGypsy\n-BLOOD_LTR\t 0.49499975\t1.5864e-01\tLTR\tGypsy\n-Invader5_LTR\t-0.67598409\t1.5864e-01\tLTR\tGypsy\n-R1-2_DM\t-0.61185323\t1.5864e-01\tLINE\tR1\n-Transib5\t-0.51646991\t1.5864e-01\tDNA\tCMC-Transib\n-TLD2\t-1.24823549\t1.5867e-01\tLTR\tGypsy\n-TC1_DM\t 0.40057291\t1.6763e-01\tDNA\tTcMar-Tc1\n-BS2\t-0.33138649\t1.8548e-01\tLINE\tJockey\n-ROVER-LTR_DM\t-0.52968179\t1.8874e-01\tLTR\tGypsy\n-ACCORD2_I-int\t-0.35057261\t1.9179e-01\tLTR\tGypsy\n-Gypsy12A_LTR\t-0.354465'..b'.2455e-01\tLTR\tGypsy\n+Chouto_LTR\t-0.4338500\t4.3619e-01\tLTR\tGypsy\n+Chimpo_I-int\t-0.3314691\t4.5127e-01\tLTR\tGypsy\n+NOMAD_LTR\t-0.3209878\t4.5127e-01\tLTR\tGypsy\n+DIVER2_I-int\t-0.2289981\t4.5378e-01\tLTR\tPao\n+DOC3_DM\t-0.2381738\t4.6852e-01\tLINE\tJockey\n+Gypsy2_LTR\t 0.3263404\t4.9330e-01\tLTR\tGypsy\n+S_DM\t 0.1820634\t4.9593e-01\tDNA\tTcMar-Tc1\n+Gypsy6_I-int\t 0.2334889\t5.0030e-01\tLTR\tGypsy\n+Invader6_I-int\t-0.2164593\t5.0656e-01\tLTR\tGypsy\n+ROOA_I-int\t-0.2055700\t5.0656e-01\tLTR\tPao\n+BURDOCK_LTR\t 0.3163693\t5.2334e-01\tLTR\tGypsy\n+MAX_LTR\t-0.2697259\t5.2334e-01\tLTR\tPao\n+ROO_I-int\t 0.2651464\t5.2334e-01\tLTR\tPao\n+XDMR\t 0.2389322\t5.2334e-01\tUnknown\tUnknown\n+BEL_I-int\t 0.2260271\t5.4960e-01\tLTR\tPao\n+Gypsy10_I-int\t-0.2003926\t5.4960e-01\tLTR\tGypsy\n+Gypsy11_LTR\t-0.3153869\t5.4960e-01\tLTR\tGypsy\n+Gypsy8_LTR\t-0.2405647\t5.4960e-01\tLTR\tGypsy\n+Transib-N1_DM\t-0.2959361\t5.4960e-01\tDNA\tCMC-Transib\n+DIVER_LTR\t 0.2596569\t5.5142e-01\tLTR\tPao\n+DOC6_DM\t 0.2109295\t5.5744e-01\tLINE\tJockey\n+GTWIN_I-int\t 0.1824941\t5.5998e-01\tLTR\tGypsy\n+MDG1_LTR\t-0.2338760\t5.6341e-01\tLTR\tGypsy\n+ARS406_DM\t-0.2643500\t5.7901e-01\tUnknown\tUnknown\n+Invader3_I-int\t-0.1497623\t5.7901e-01\tLTR\tGypsy\n+TRANSIB2\t-0.1574346\t5.7901e-01\tDNA\tCMC-Transib\n+Gypsy9_LTR\t-0.3531528\t6.2010e-01\tLTR\tGypsy\n+Invader2_I-int\t-0.1629423\t6.2495e-01\tLTR\tGypsy\n+Jockey2\t-0.1743865\t6.2495e-01\tLINE\tJockey\n+MDG3_LTR\t-0.2003640\t6.2495e-01\tLTR\tGypsy\n+POGO\t-0.1617454\t6.2495e-01\tDNA\tTcMar-Pogo\n+FW2_DM\t 0.1701777\t6.5086e-01\tLINE\tJockey\n+Baggins1\t-0.1467398\t6.5429e-01\tLINE\tLOA\n+PROTOP_A\t 0.1182439\t6.7517e-01\tDNA\tP\n+TART-A\t 0.1256629\t6.7517e-01\tLINE\tJockey\n+Stalker3_LTR\t-0.1859830\t6.8046e-01\tLTR\tGypsy\n+Bica_LTR\t-0.2144653\t6.8233e-01\tLTR\tGypsy\n+NTS_DM\t-0.1893877\t6.8404e-01\tOther\tOther\n+G5_DM\t-0.1234155\t6.8736e-01\tLINE\tJockey\n+TABOR_LTR\t-0.2553509\t6.8736e-01\tLTR\tGypsy\n+FB4_DM\t-0.1257960\t7.3366e-01\tDNA\tTcMar-Tc1\n+HMSBEAGLE_I-int\t 0.1271561\t7.3366e-01\tLTR\tGypsy\n+DMRT1B\t-0.1232374\t7.3586e-01\tLINE\tR1\n+NOMAD_I-int\t-0.1496694\t7.3586e-01\tLTR\tGypsy\n+ZAM_LTR\t 0.1743412\t7.3586e-01\tLTR\tGypsy\n+Gypsy10_LTR\t-0.1653357\t7.4754e-01\tLTR\tGypsy\n+LmeSINE1c\t-0.2373454\t7.4754e-01\tSINE\ttRNA-Deu-L2\n+QUASIMODO2-LTR_DM\t-0.1678274\t7.4754e-01\tLTR\tGypsy\n+BEL_LTR\t 0.1413744\t7.6869e-01\tLTR\tPao\n+QUASIMODO_I-int\t 0.1147817\t7.6929e-01\tLTR\tGypsy\n+Invader1_LTR\t 0.1285521\t7.7711e-01\tLTR\tGypsy\n+IVK_DM\t-0.0922152\t7.8193e-01\tLINE\tI\n+BS4_DM\t-0.2651727\t7.9393e-01\tLINE\tJockey\n+Copia2_LTR_DM\t 0.1351925\t7.9393e-01\tLTR\tCopia\n+Gypsy12_LTR\t-0.0944692\t7.9977e-01\tLTR\tGypsy\n+Gypsy5_I-int\t-0.0889119\t8.0188e-01\tLTR\tGypsy\n+Gypsy6A_LTR\t-0.1275535\t8.1055e-01\tLTR\tGypsy\n+PROTOP_B\t-0.0857822\t8.1067e-01\tDNA\tP\n+Copia1-LTR_DM\t-0.1081470\t8.2869e-01\tLTR\tCopia\n+G_DM\t 0.0788783\t8.3480e-01\tLINE\tJockey\n+LSU-rRNA_Cel\t-0.1045374\t8.4508e-01\trRNA\trRNA\n+IDEFIX_I-int\t 0.0667804\t8.6328e-01\tLTR\tGypsy\n+DMRP1\t 0.0889793\t8.7188e-01\tUnknown\tUnknown\n+Gypsy4_I-int\t-0.0632668\t8.8199e-01\tLTR\tGypsy\n+QUASIMODO_LTR\t-0.0758141\t8.8199e-01\tLTR\tGypsy\n+Gypsy5_LTR\t-0.0832238\t8.8416e-01\tLTR\tGypsy\n+Gypsy7_I-int\t 0.0709106\t8.8416e-01\tLTR\tGypsy\n+TC1-2_DM\t 0.0485761\t8.8416e-01\tDNA\tTcMar-Tc1\n+ROVER-I_DM\t-0.0523539\t8.8501e-01\tLTR\tGypsy\n+Stalker2_LTR\t-0.0636566\t8.8501e-01\tLTR\tGypsy\n+MDG1_I-int\t-0.0527394\t9.0541e-01\tLTR\tGypsy\n+DMLTR5\t 0.0615033\t9.2259e-01\tLTR\tGypsy\n+Gypsy2_I-int\t-0.0370213\t9.2289e-01\tLTR\tGypsy\n+Gypsy2-LTR_DM\t 0.0531394\t9.2289e-01\tLTR\tGypsy\n+ALA_DM\t-0.1255824\t9.2725e-01\tUnknown\tUnknown\n+Gypsy3_LTR\t-0.0416825\t9.5360e-01\tLTR\tGypsy\n+M4DM\t-0.0286022\t9.5845e-01\tDNA\tCMC-Transib\n+DNAREP1_DM\t 0.0280918\t9.6629e-01\tRC\tHelitron\n+BARI_DM\t 0.0258962\t9.7189e-01\tDNA\tTcMar-Tc1\n+DMTOM1_LTR\t 0.0238942\t9.7754e-01\tLTR\tGypsy\n+MINOS\t 0.0226472\t9.8823e-01\tDNA\tTcMar-Tc1\n+STALKER4_I-int\t 0.0128051\t9.8873e-01\tLTR\tGypsy\n+G2_DM\t 0.0110450\t9.9427e-01\tLINE\tJockey\n+IDEFIX_LTR\t 0.0103925\t9.9427e-01\tLTR\tGypsy\n+ROOA_LTR\t-0.0111583\t9.9548e-01\tLTR\tPao\n+BS\t-0.0065436\t9.9758e-01\tLINE\tJockey\n+Invader4_LTR\t 0.0049712\t9.9758e-01\tLTR\tGypsy\n+Copia1-I_DM\t 0.0012652\t1.0000e+00\tLTR\tCopia\n+FTZ_DM\t 0.0000000\t1.0000e+00\tUnknown\tUnknown\n+FUSHI_DM\t 0.0000000\t1.0000e+00\tUnknown\tUnknown\n'