Mercurial > repos > artbio > repenrich2
annotate RepEnrich2.py @ 6:388a47ca4199 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit a9af1284d644055565b6cf3401569694218ab8e9
author | artbio |
---|---|
date | Mon, 22 Apr 2024 10:31:50 +0000 |
parents | 08e50af788f7 |
children | 61e0404f0d76 |
rev | line source |
---|---|
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
1 import argparse |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
2 import csv |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
3 import os |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
4 import shlex |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
5 import subprocess |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
6 import sys |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
7 from collections import defaultdict |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
8 from concurrent.futures import ProcessPoolExecutor |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
9 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
10 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
11 parser = argparse.ArgumentParser(description=''' |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
12 Repenrich aligns reads to Repeat Elements pseudogenomes\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
13 and counts aligned reads. RepEnrich_setup must be run\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
14 before its use''') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
15 parser.add_argument('--annotation_file', action='store', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
16 metavar='annotation_file', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
17 help='RepeatMasker.org annotation file for your\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
18 organism. The file may be downloaded from\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
19 RepeatMasker.org. E.g. hg19_repeatmasker.txt') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
20 parser.add_argument('--alignment_bam', action='store', metavar='alignment_bam', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
21 help='Bam alignments of unique mapper reads.') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
22 parser.add_argument('--fastqfile', action='store', metavar='fastqfile', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
23 help='File of fastq reads mapping to multiple\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
24 locations. Example: /data/multimap.fastq') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
25 parser.add_argument('--fastqfile2', action='store', dest='fastqfile2', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
26 metavar='fastqfile2', default='', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
27 help='fastqfile #2 when using paired-end option.\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
28 Default none') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
29 parser.add_argument('--cpus', action='store', dest='cpus', metavar='cpus', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
30 default="1", type=int, |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
31 help='Number of CPUs. The more cpus the\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
32 faster RepEnrich performs. Default: "1"') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
33 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
34 args = parser.parse_args() |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
35 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
36 # parameters |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
37 annotation_file = args.annotation_file |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
38 unique_mapper_bam = args.alignment_bam |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
39 fastqfile_1 = args.fastqfile |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
40 fastqfile_2 = args.fastqfile2 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
41 cpus = args.cpus |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
42 # Change if simple repeats are differently annotated in your organism |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
43 simple_repeat = "Simple_repeat" |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
44 if args.fastqfile2: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
45 paired_end = True |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
46 else: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
47 paired_end = False |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
48 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
49 # check that the programs we need are available |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
50 try: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
51 subprocess.call(shlex.split("coverageBed -h"), |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
52 stdout=open(os.devnull, 'wb'), |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
53 stderr=open(os.devnull, 'wb')) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
54 subprocess.call(shlex.split("bowtie2 --version"), |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
55 stdout=open(os.devnull, 'wb'), |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
56 stderr=open(os.devnull, 'wb')) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
57 except OSError: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
58 print("Error: Bowtie2 or bedtools not loaded") |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
59 raise |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
60 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
61 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
62 def starts_with_numerical(list): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
63 try: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
64 if len(list) == 0: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
65 return False |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
66 int(list[0]) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
67 return True |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
68 except ValueError: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
69 return False |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
70 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
71 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
72 # define a text importer for .out/.txt format of repbase |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
73 def import_text(filename, separator): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
74 csv.field_size_limit(sys.maxsize) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
75 file = csv.reader(open(filename), delimiter=separator, |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
76 skipinitialspace=True) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
77 return [line for line in file if starts_with_numerical(line)] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
78 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
79 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
80 def run_bowtie(args): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
81 metagenome, fastqfile = args |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
82 b_opt = "-k 1 -p 1 --quiet --no-hd" |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
83 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile}") |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
84 bowtie_align = subprocess.run(command, check=True, |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
85 capture_output=True, text=True).stdout |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
86 bowtie_align = bowtie_align.rstrip('\r\n').split('\n') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
87 readlist = [metagenome] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
88 if paired_end: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
89 for line in bowtie_align: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
90 readlist.append(line.split("/")[0]) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
91 else: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
92 for line in bowtie_align: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
93 readlist.append(line.split("\t")[0]) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
94 return readlist |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
95 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
96 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
97 # set a reference repeat list for the script |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
98 repeat_list = [listline[9].translate( |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
99 str.maketrans( |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
100 '()/', '___')) for listline in import_text(annotation_file, ' ')] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
101 repeat_list = sorted(list(set(repeat_list))) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
102 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
103 # unique mapper counting |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
104 cmd = f"bedtools bamtobed -i {unique_mapper_bam} | \ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
105 bedtools coverage -b stdin -a repnames.bed" |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
106 p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
107 bedtools_counts = p.communicate()[0].decode().rstrip('\r\n').split('\n') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
108 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
109 # parse bedtools output |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
110 counts = defaultdict(int) # key: repeat names, value: unique mapper counts |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
111 sumofrepeatreads = 0 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
112 for line in bedtools_counts: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
113 line = line.split('\t') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
114 counts[line[3]] += int(line[4]) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
115 sumofrepeatreads += int(line[4]) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
116 print(f"Identified {sumofrepeatreads} unique reads that mapped to repeats.") |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
117 |
5
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
118 # print unique mapper counts |
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
119 with open("unique_mapper_counts.tsv", 'w') as fout: |
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
120 fout.write("#element\tcount\n") |
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
121 for count in sorted(counts): |
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
122 fout.write(f"{count}\t{counts[count]}\n") |
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
123 |
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
124 # multimapper parsing |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
125 if not paired_end: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
126 args_list = [(metagenome, fastqfile_1) for |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
127 metagenome in repeat_list] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
128 with ProcessPoolExecutor(max_workers=cpus) as executor: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
129 results = executor.map(run_bowtie, args_list) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
130 else: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
131 args_list = [(metagenome, fastqfile_1) for |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
132 metagenome in repeat_list] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
133 args_list.extend([(metagenome, fastqfile_2) for |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
134 metagenome in repeat_list]) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
135 with ProcessPoolExecutor(max_workers=cpus) as executor: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
136 results = executor.map(run_bowtie, args_list) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
137 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
138 # Aggregate results (avoiding race conditions) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
139 metagenome_reads = defaultdict(list) # repeat_name: list of multimap reads |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
140 for result in results: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
141 metagenome_reads[result[0]] += result[1:] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
142 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
143 for name in metagenome_reads: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
144 # read are only once in list |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
145 metagenome_reads[name] = list(set(metagenome_reads[name])) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
146 # remove "no read" instances |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
147 metagenome_reads[name] = [read for read in metagenome_reads[name] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
148 if read != ""] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
149 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
150 # implement repeats_by_reads from the inverse dictionnary metagenome_reads |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
151 repeats_by_reads = defaultdict(list) # readids: list of repeats names |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
152 for repname in metagenome_reads: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
153 for read in metagenome_reads[repname]: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
154 repeats_by_reads[read].append(repname) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
155 for repname in repeats_by_reads: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
156 repeats_by_reads[repname] = list(set(repeats_by_reads[repname])) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
157 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
158 # 3 dictionnaries and 1 pointer variable to be populated |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
159 fractionalcounts = defaultdict(float) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
160 familyfractionalcounts = defaultdict(float) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
161 classfractionalcounts = defaultdict(float) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
162 sumofrepeatreads = 0 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
163 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
164 # Update counts dictionnary with sets of repeats (was "subfamilies") |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
165 # matched by multimappers |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
166 for repeat_set in repeats_by_reads.values(): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
167 repeat_set_string = ','.join(repeat_set) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
168 counts[repeat_set_string] += 1 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
169 sumofrepeatreads += 1 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
170 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
171 print(f'Identified more {sumofrepeatreads} mutimapper repeat reads') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
172 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
173 # Populate fractionalcounts |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
174 for key, count in counts.items(): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
175 key_list = key.split(',') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
176 for i in key_list: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
177 fractionalcounts[i] += count / len(key_list) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
178 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
179 # build repeat_ref for easy access to rep class and rep families |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
180 repeat_ref = defaultdict(dict) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
181 repeats = import_text(annotation_file, ' ') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
182 for repeat in repeats: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
183 repeat_name = repeat[9].translate(str.maketrans('()/', '___')) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
184 try: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
185 repclass = repeat[10].split('/')[0] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
186 repfamily = repeat[10].split('/')[1] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
187 except IndexError: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
188 repclass, repfamily = repeat[10], repeat[10] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
189 repeat_ref[repeat_name]['class'] = repclass |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
190 repeat_ref[repeat_name]['family'] = repfamily |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
191 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
192 # Populate classfractionalcounts and familyfractionalcounts |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
193 for key, value in fractionalcounts.items(): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
194 classfractionalcounts[repeat_ref[key]['class']] += value |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
195 familyfractionalcounts[repeat_ref[key]['family']] += value |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
196 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
197 # print class-, family- and fraction-repeats counts to files |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
198 with open("class_fraction_counts.tsv", 'w') as fout: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
199 for key in sorted(classfractionalcounts): |
6
388a47ca4199
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit a9af1284d644055565b6cf3401569694218ab8e9
artbio
parents:
5
diff
changeset
|
200 fout.write(f"{key}\t{round(classfractionalcounts[key], 2)}\n") |
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
201 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
202 with open("family_fraction_counts.tsv", 'w') as fout: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
203 for key in sorted(familyfractionalcounts): |
6
388a47ca4199
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit a9af1284d644055565b6cf3401569694218ab8e9
artbio
parents:
5
diff
changeset
|
204 fout.write(f"{key}\t{round(familyfractionalcounts[key], 2)}\n") |
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
205 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
206 with open("fraction_counts.tsv", 'w') as fout: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
207 for key in sorted(fractionalcounts): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
208 fout.write(f"{key}\t{repeat_ref[key]['class']}\t" |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
209 f"{repeat_ref[key]['family']}\t" |
6
388a47ca4199
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit a9af1284d644055565b6cf3401569694218ab8e9
artbio
parents:
5
diff
changeset
|
210 f"{round(fractionalcounts[key], 2)}\n") |