Mercurial > repos > artbio > repenrich2
comparison RepEnrich2.py @ 9:2b61c6407efb draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
author | artbio |
---|---|
date | Thu, 25 Apr 2024 16:22:34 +0000 |
parents | 567549a49eb2 |
children |
comparison
equal
deleted
inserted
replaced
8:567549a49eb2 | 9:2b61c6407efb |
---|---|
107 | 107 |
108 def run_bowtie(args): | 108 def run_bowtie(args): |
109 ''' | 109 ''' |
110 write to files to save memory | 110 write to files to save memory |
111 ''' | 111 ''' |
112 metagenome, fastqfile = args | 112 metagenome = args |
113 b_opt = "-k 1 -p 1 --quiet --no-hd --no-unal" | 113 b_opt = "-k 1 -p 2 --quiet --no-hd --no-unal" |
114 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile}") | 114 if paired_end is True: |
115 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome}" | |
116 f" -1 {fastqfile_1} -2 {fastqfile_1}") | |
117 else: | |
118 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile_1}") | |
115 bowtie_align = subprocess.run(command, check=True, | 119 bowtie_align = subprocess.run(command, check=True, |
116 capture_output=True, text=True).stdout | 120 capture_output=True, text=True).stdout |
117 bowtie_align = bowtie_align.rstrip('\r\n').split('\n') | 121 bowtie_align = bowtie_align.rstrip('\r\n').split('\n') |
118 with open(f"{metagenome}.reads", "a+") as readfile: | 122 with open(f"{metagenome}.reads", "a+") as readfile: |
119 for line in bowtie_align: | 123 for line in bowtie_align: |
121 if read: | 125 if read: |
122 readfile.write(f"{read}\n") | 126 readfile.write(f"{read}\n") |
123 | 127 |
124 | 128 |
125 # multimapper parsing | 129 # multimapper parsing |
126 args_list = [(metagenome, fastqfile_1) for metagenome in repeat_list] | 130 args_list = [metagenome for metagenome in repeat_list] |
127 if paired_end: | |
128 args_list.extend([(metagenome, fastqfile_2) for | |
129 metagenome in repeat_list]) | |
130 with ProcessPoolExecutor(max_workers=cpus) as executor: | 131 with ProcessPoolExecutor(max_workers=cpus) as executor: |
131 results = executor.map(run_bowtie, args_list) | 132 results = executor.map(run_bowtie, args_list) |
132 | 133 |
133 # Aggregate results (avoiding race conditions) | 134 # Aggregate results (avoiding race conditions) |
134 metagenome_reads = defaultdict(list) # metagenome: list of multimap reads | 135 metagenome_reads = defaultdict(list) # metagenome: list of multimap reads |
135 | 136 |
136 # Now we read .reads file to populate metagnomes_reads | 137 # Now we read .reads files to populate metagnomes_reads |
137 for metagenome in repeat_list: | 138 for metagenome in repeat_list: |
138 with open(f"{metagenome}.reads") as readfile: | 139 with open(f"{metagenome}.reads") as readfile: |
139 for read in readfile: | 140 for read in readfile: |
140 metagenome_reads[metagenome].append(read) | 141 metagenome_reads[metagenome].append(read) |
141 # read are only once in list | 142 # read are only once in list |