annotate RepEnrich2.py @ 7:61e0404f0d76 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
author artbio
date Tue, 23 Apr 2024 21:44:44 +0000
parents 388a47ca4199
children 567549a49eb2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
1 import argparse
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
2 import csv
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
3 import os
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
4 import shlex
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
5 import subprocess
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
6 import sys
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
7 from collections import defaultdict
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
8 from concurrent.futures import ProcessPoolExecutor
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
9
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
10
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
11 parser = argparse.ArgumentParser(description='''
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
12 Repenrich aligns reads to Repeat Elements pseudogenomes\
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
13 and counts aligned reads. RepEnrich_setup must be run\
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
14 before its use''')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
15 parser.add_argument('--annotation_file', action='store',
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
16 metavar='annotation_file',
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
17 help='RepeatMasker.org annotation file for your\
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
18 organism. The file may be downloaded from\
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
19 RepeatMasker.org. E.g. hg19_repeatmasker.txt')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
20 parser.add_argument('--alignment_bam', action='store', metavar='alignment_bam',
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
21 help='Bam alignments of unique mapper reads.')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
22 parser.add_argument('--fastqfile', action='store', metavar='fastqfile',
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
23 help='File of fastq reads mapping to multiple\
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
24 locations. Example: /data/multimap.fastq')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
25 parser.add_argument('--fastqfile2', action='store', dest='fastqfile2',
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
26 metavar='fastqfile2', default='',
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
27 help='fastqfile #2 when using paired-end option.\
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
28 Default none')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
29 parser.add_argument('--cpus', action='store', dest='cpus', metavar='cpus',
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
30 default="1", type=int,
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
31 help='Number of CPUs. The more cpus the\
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
32 faster RepEnrich performs. Default: "1"')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
33
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
34 args = parser.parse_args()
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
35
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
36 # parameters
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
37 annotation_file = args.annotation_file
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
38 unique_mapper_bam = args.alignment_bam
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
39 fastqfile_1 = args.fastqfile
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
40 fastqfile_2 = args.fastqfile2
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
41 cpus = args.cpus
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
42 # Change if simple repeats are differently annotated in your organism
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
43 simple_repeat = "Simple_repeat"
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
44 if args.fastqfile2:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
45 paired_end = True
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
46 else:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
47 paired_end = False
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
48
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
49 # check that the programs we need are available
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
50 try:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
51 subprocess.call(shlex.split("coverageBed -h"),
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
52 stdout=open(os.devnull, 'wb'),
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
53 stderr=open(os.devnull, 'wb'))
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
54 subprocess.call(shlex.split("bowtie2 --version"),
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
55 stdout=open(os.devnull, 'wb'),
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
56 stderr=open(os.devnull, 'wb'))
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
57 except OSError:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
58 print("Error: Bowtie2 or bedtools not loaded")
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
59 raise
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
60
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
61
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
62 def starts_with_numerical(list):
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
63 try:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
64 if len(list) == 0:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
65 return False
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
66 int(list[0])
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
67 return True
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
68 except ValueError:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
69 return False
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
70
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
71
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
72 # define a text importer for .out/.txt format of repbase
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
73 def import_text(filename, separator):
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
74 csv.field_size_limit(sys.maxsize)
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
75 file = csv.reader(open(filename), delimiter=separator,
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
76 skipinitialspace=True)
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
77 return [line for line in file if starts_with_numerical(line)]
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
78
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
79
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
80 # set a reference repeat list for the script
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
81 repeat_list = [listline[9].translate(
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
82 str.maketrans(
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
83 '()/', '___')) for listline in import_text(annotation_file, ' ')]
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
84 repeat_list = sorted(list(set(repeat_list)))
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
85
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
86 # unique mapper counting
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
87 cmd = f"bedtools bamtobed -i {unique_mapper_bam} | \
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
88 bedtools coverage -b stdin -a repnames.bed"
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
89 p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
90 bedtools_counts = p.communicate()[0].decode().rstrip('\r\n').split('\n')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
91
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
92 # parse bedtools output
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
93 counts = defaultdict(int) # key: repeat names, value: unique mapper counts
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
94 sumofrepeatreads = 0
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
95 for line in bedtools_counts:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
96 line = line.split('\t')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
97 counts[line[3]] += int(line[4])
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
98 sumofrepeatreads += int(line[4])
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
99 print(f"Identified {sumofrepeatreads} unique reads that mapped to repeats.")
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
100
5
08e50af788f7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents: 0
diff changeset
101 # print unique mapper counts
08e50af788f7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents: 0
diff changeset
102 with open("unique_mapper_counts.tsv", 'w') as fout:
08e50af788f7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents: 0
diff changeset
103 fout.write("#element\tcount\n")
08e50af788f7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents: 0
diff changeset
104 for count in sorted(counts):
08e50af788f7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents: 0
diff changeset
105 fout.write(f"{count}\t{counts[count]}\n")
08e50af788f7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents: 0
diff changeset
106
7
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
107
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
108 def run_bowtie(args):
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
109 '''
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
110 write to files to save memory
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
111 '''
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
112 metagenome, fastqfile = args
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
113 b_opt = "-k 1 -p 1 --quiet --no-hd"
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
114 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile}")
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
115 bowtie_align = subprocess.run(command, check=True,
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
116 capture_output=True, text=True).stdout
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
117 bowtie_align = bowtie_align.rstrip('\r\n').split('\n')
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
118 with open(f"{metagenome}.reads", "a+") as readfile:
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
119 for line in bowtie_align:
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
120 read = line.split()[0].split("/")[0]
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
121 if read:
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
122 readfile.write(f"{read}\n")
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
123
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
124
0
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
125 # multimapper parsing
7
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
126 args_list = [(metagenome, fastqfile_1) for metagenome in repeat_list]
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
127 if paired_end:
0
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
128 args_list.extend([(metagenome, fastqfile_2) for
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
129 metagenome in repeat_list])
7
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
130 with ProcessPoolExecutor(max_workers=cpus) as executor:
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
131 results = executor.map(run_bowtie, args_list)
0
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
132
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
133 # Aggregate results (avoiding race conditions)
7
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
134 metagenome_reads = defaultdict(list) # metagenome: list of multimap reads
0
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
135
7
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
136 # Now we read .reads file to populate metagnomes_reads
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
137 for metagenome in repeat_list:
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
138 with open(f"{metagenome}.reads") as readfile:
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
139 for read in readfile:
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
140 metagenome_reads[metagenome].append(read)
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
141 # read are only once in list
61e0404f0d76 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents: 6
diff changeset
142 metagenome_reads[metagenome] = list(set(metagenome_reads[metagenome]))
0
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
143
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
144 # implement repeats_by_reads from the inverse dictionnary metagenome_reads
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
145 repeats_by_reads = defaultdict(list) # readids: list of repeats names
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
146 for repname in metagenome_reads:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
147 for read in metagenome_reads[repname]:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
148 repeats_by_reads[read].append(repname)
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
149 for repname in repeats_by_reads:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
150 repeats_by_reads[repname] = list(set(repeats_by_reads[repname]))
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
151
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
152 # 3 dictionnaries and 1 pointer variable to be populated
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
153 fractionalcounts = defaultdict(float)
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
154 familyfractionalcounts = defaultdict(float)
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
155 classfractionalcounts = defaultdict(float)
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
156 sumofrepeatreads = 0
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
157
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
158 # Update counts dictionnary with sets of repeats (was "subfamilies")
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
159 # matched by multimappers
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
160 for repeat_set in repeats_by_reads.values():
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
161 repeat_set_string = ','.join(repeat_set)
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
162 counts[repeat_set_string] += 1
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
163 sumofrepeatreads += 1
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
164
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
165 print(f'Identified more {sumofrepeatreads} mutimapper repeat reads')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
166
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
167 # Populate fractionalcounts
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
168 for key, count in counts.items():
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
169 key_list = key.split(',')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
170 for i in key_list:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
171 fractionalcounts[i] += count / len(key_list)
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
172
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
173 # build repeat_ref for easy access to rep class and rep families
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
174 repeat_ref = defaultdict(dict)
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
175 repeats = import_text(annotation_file, ' ')
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
176 for repeat in repeats:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
177 repeat_name = repeat[9].translate(str.maketrans('()/', '___'))
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
178 try:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
179 repclass = repeat[10].split('/')[0]
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
180 repfamily = repeat[10].split('/')[1]
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
181 except IndexError:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
182 repclass, repfamily = repeat[10], repeat[10]
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
183 repeat_ref[repeat_name]['class'] = repclass
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
184 repeat_ref[repeat_name]['family'] = repfamily
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
185
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
186 # Populate classfractionalcounts and familyfractionalcounts
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
187 for key, value in fractionalcounts.items():
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
188 classfractionalcounts[repeat_ref[key]['class']] += value
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
189 familyfractionalcounts[repeat_ref[key]['family']] += value
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
190
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
191 # print class-, family- and fraction-repeats counts to files
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
192 with open("class_fraction_counts.tsv", 'w') as fout:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
193 for key in sorted(classfractionalcounts):
6
388a47ca4199 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit a9af1284d644055565b6cf3401569694218ab8e9
artbio
parents: 5
diff changeset
194 fout.write(f"{key}\t{round(classfractionalcounts[key], 2)}\n")
0
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
195
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
196 with open("family_fraction_counts.tsv", 'w') as fout:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
197 for key in sorted(familyfractionalcounts):
6
388a47ca4199 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit a9af1284d644055565b6cf3401569694218ab8e9
artbio
parents: 5
diff changeset
198 fout.write(f"{key}\t{round(familyfractionalcounts[key], 2)}\n")
0
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
199
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
200 with open("fraction_counts.tsv", 'w') as fout:
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
201 for key in sorted(fractionalcounts):
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
202 fout.write(f"{key}\t{repeat_ref[key]['class']}\t"
4905a332a094 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff changeset
203 f"{repeat_ref[key]['family']}\t"
6
388a47ca4199 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit a9af1284d644055565b6cf3401569694218ab8e9
artbio
parents: 5
diff changeset
204 f"{round(fractionalcounts[key], 2)}\n")