Mercurial > repos > artbio > repenrich2
comparison RepEnrich2.py @ 9:2b61c6407efb draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
| author | artbio |
|---|---|
| date | Thu, 25 Apr 2024 16:22:34 +0000 |
| parents | 567549a49eb2 |
| children |
comparison
equal
deleted
inserted
replaced
| 8:567549a49eb2 | 9:2b61c6407efb |
|---|---|
| 107 | 107 |
| 108 def run_bowtie(args): | 108 def run_bowtie(args): |
| 109 ''' | 109 ''' |
| 110 write to files to save memory | 110 write to files to save memory |
| 111 ''' | 111 ''' |
| 112 metagenome, fastqfile = args | 112 metagenome = args |
| 113 b_opt = "-k 1 -p 1 --quiet --no-hd --no-unal" | 113 b_opt = "-k 1 -p 2 --quiet --no-hd --no-unal" |
| 114 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile}") | 114 if paired_end is True: |
| 115 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome}" | |
| 116 f" -1 {fastqfile_1} -2 {fastqfile_1}") | |
| 117 else: | |
| 118 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile_1}") | |
| 115 bowtie_align = subprocess.run(command, check=True, | 119 bowtie_align = subprocess.run(command, check=True, |
| 116 capture_output=True, text=True).stdout | 120 capture_output=True, text=True).stdout |
| 117 bowtie_align = bowtie_align.rstrip('\r\n').split('\n') | 121 bowtie_align = bowtie_align.rstrip('\r\n').split('\n') |
| 118 with open(f"{metagenome}.reads", "a+") as readfile: | 122 with open(f"{metagenome}.reads", "a+") as readfile: |
| 119 for line in bowtie_align: | 123 for line in bowtie_align: |
| 121 if read: | 125 if read: |
| 122 readfile.write(f"{read}\n") | 126 readfile.write(f"{read}\n") |
| 123 | 127 |
| 124 | 128 |
| 125 # multimapper parsing | 129 # multimapper parsing |
| 126 args_list = [(metagenome, fastqfile_1) for metagenome in repeat_list] | 130 args_list = [metagenome for metagenome in repeat_list] |
| 127 if paired_end: | |
| 128 args_list.extend([(metagenome, fastqfile_2) for | |
| 129 metagenome in repeat_list]) | |
| 130 with ProcessPoolExecutor(max_workers=cpus) as executor: | 131 with ProcessPoolExecutor(max_workers=cpus) as executor: |
| 131 results = executor.map(run_bowtie, args_list) | 132 results = executor.map(run_bowtie, args_list) |
| 132 | 133 |
| 133 # Aggregate results (avoiding race conditions) | 134 # Aggregate results (avoiding race conditions) |
| 134 metagenome_reads = defaultdict(list) # metagenome: list of multimap reads | 135 metagenome_reads = defaultdict(list) # metagenome: list of multimap reads |
| 135 | 136 |
| 136 # Now we read .reads file to populate metagnomes_reads | 137 # Now we read .reads files to populate metagnomes_reads |
| 137 for metagenome in repeat_list: | 138 for metagenome in repeat_list: |
| 138 with open(f"{metagenome}.reads") as readfile: | 139 with open(f"{metagenome}.reads") as readfile: |
| 139 for read in readfile: | 140 for read in readfile: |
| 140 metagenome_reads[metagenome].append(read) | 141 metagenome_reads[metagenome].append(read) |
| 141 # read are only once in list | 142 # read are only once in list |
