comparison RepEnrich2.py @ 9:2b61c6407efb draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
author artbio
date Thu, 25 Apr 2024 16:22:34 +0000
parents 567549a49eb2
children
comparison
equal deleted inserted replaced
8:567549a49eb2 9:2b61c6407efb
107 107
108 def run_bowtie(args): 108 def run_bowtie(args):
109 ''' 109 '''
110 write to files to save memory 110 write to files to save memory
111 ''' 111 '''
112 metagenome, fastqfile = args 112 metagenome = args
113 b_opt = "-k 1 -p 1 --quiet --no-hd --no-unal" 113 b_opt = "-k 1 -p 2 --quiet --no-hd --no-unal"
114 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile}") 114 if paired_end is True:
115 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome}"
116 f" -1 {fastqfile_1} -2 {fastqfile_1}")
117 else:
118 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile_1}")
115 bowtie_align = subprocess.run(command, check=True, 119 bowtie_align = subprocess.run(command, check=True,
116 capture_output=True, text=True).stdout 120 capture_output=True, text=True).stdout
117 bowtie_align = bowtie_align.rstrip('\r\n').split('\n') 121 bowtie_align = bowtie_align.rstrip('\r\n').split('\n')
118 with open(f"{metagenome}.reads", "a+") as readfile: 122 with open(f"{metagenome}.reads", "a+") as readfile:
119 for line in bowtie_align: 123 for line in bowtie_align:
121 if read: 125 if read:
122 readfile.write(f"{read}\n") 126 readfile.write(f"{read}\n")
123 127
124 128
125 # multimapper parsing 129 # multimapper parsing
126 args_list = [(metagenome, fastqfile_1) for metagenome in repeat_list] 130 args_list = [metagenome for metagenome in repeat_list]
127 if paired_end:
128 args_list.extend([(metagenome, fastqfile_2) for
129 metagenome in repeat_list])
130 with ProcessPoolExecutor(max_workers=cpus) as executor: 131 with ProcessPoolExecutor(max_workers=cpus) as executor:
131 results = executor.map(run_bowtie, args_list) 132 results = executor.map(run_bowtie, args_list)
132 133
133 # Aggregate results (avoiding race conditions) 134 # Aggregate results (avoiding race conditions)
134 metagenome_reads = defaultdict(list) # metagenome: list of multimap reads 135 metagenome_reads = defaultdict(list) # metagenome: list of multimap reads
135 136
136 # Now we read .reads file to populate metagnomes_reads 137 # Now we read .reads files to populate metagnomes_reads
137 for metagenome in repeat_list: 138 for metagenome in repeat_list:
138 with open(f"{metagenome}.reads") as readfile: 139 with open(f"{metagenome}.reads") as readfile:
139 for read in readfile: 140 for read in readfile:
140 metagenome_reads[metagenome].append(read) 141 metagenome_reads[metagenome].append(read)
141 # read are only once in list 142 # read are only once in list