# HG changeset patch # User artbio # Date 1714062154 0 # Node ID 2b61c6407efb05a9041ee3fa412a5e8324be1aae # Parent 567549a49eb24a8567344509badd4645f829382f planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a diff -r 567549a49eb2 -r 2b61c6407efb RepEnrich2.py --- a/RepEnrich2.py Thu Apr 25 09:06:48 2024 +0000 +++ b/RepEnrich2.py Thu Apr 25 16:22:34 2024 +0000 @@ -109,9 +109,13 @@ ''' write to files to save memory ''' - metagenome, fastqfile = args - b_opt = "-k 1 -p 1 --quiet --no-hd --no-unal" - command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile}") + metagenome = args + b_opt = "-k 1 -p 2 --quiet --no-hd --no-unal" + if paired_end is True: + command = shlex.split(f"bowtie2 {b_opt} -x {metagenome}" + f" -1 {fastqfile_1} -2 {fastqfile_1}") + else: + command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile_1}") bowtie_align = subprocess.run(command, check=True, capture_output=True, text=True).stdout bowtie_align = bowtie_align.rstrip('\r\n').split('\n') @@ -123,17 +127,14 @@ # multimapper parsing -args_list = [(metagenome, fastqfile_1) for metagenome in repeat_list] -if paired_end: - args_list.extend([(metagenome, fastqfile_2) for - metagenome in repeat_list]) +args_list = [metagenome for metagenome in repeat_list] with ProcessPoolExecutor(max_workers=cpus) as executor: results = executor.map(run_bowtie, args_list) # Aggregate results (avoiding race conditions) metagenome_reads = defaultdict(list) # metagenome: list of multimap reads -# Now we read .reads file to populate metagnomes_reads +# Now we read .reads files to populate metagnomes_reads for metagenome in repeat_list: with open(f"{metagenome}.reads") as readfile: for read in readfile: diff -r 567549a49eb2 -r 2b61c6407efb macros.xml --- a/macros.xml Thu Apr 25 09:06:48 2024 +0000 +++ b/macros.xml Thu Apr 25 16:22:34 2024 +0000 @@ -1,6 +1,6 @@ 2.31.1 - 8 + 9 23.0 diff -r 567549a49eb2 -r 2b61c6407efb test-data/chrY_paired_class_fraction_counts.tab --- a/test-data/chrY_paired_class_fraction_counts.tab Thu Apr 25 09:06:48 2024 +0000 +++ b/test-data/chrY_paired_class_fraction_counts.tab Thu Apr 25 16:22:34 2024 +0000 @@ -1,6 +1,6 @@ DNA 184.0 LINE 227.0 -LTR 27179.0 -Low_complexity 0.67 +LTR 27175.0 +Low_complexity 0.0 RC 0.0 -Simple_repeat 90.33 +Simple_repeat 91.0 diff -r 567549a49eb2 -r 2b61c6407efb test-data/chrY_paired_family_fraction_counts.tab --- a/test-data/chrY_paired_family_fraction_counts.tab Thu Apr 25 09:06:48 2024 +0000 +++ b/test-data/chrY_paired_family_fraction_counts.tab Thu Apr 25 16:22:34 2024 +0000 @@ -1,13 +1,13 @@ CMC-Transib 30.0 CR1 4.0 Copia 25880.0 -Gypsy 1238.0 +Gypsy 1234.0 Helitron 0.0 Jockey 107.0 LOA 0.0 -Low_complexity 0.67 +Low_complexity 0.0 P 60.0 Pao 61.0 R1 116.0 -Simple_repeat 90.33 +Simple_repeat 91.0 TcMar-Tc1 94.0 diff -r 567549a49eb2 -r 2b61c6407efb test-data/chrY_paired_fraction_counts.tab --- a/test-data/chrY_paired_fraction_counts.tab Thu Apr 25 09:06:48 2024 +0000 +++ b/test-data/chrY_paired_fraction_counts.tab Thu Apr 25 16:22:34 2024 +0000 @@ -8,8 +8,8 @@ BS2 LINE Jockey 58.0 BURDOCK_I-int LTR Gypsy 0.0 Baggins1 LINE LOA 0.0 -Bica_I-int LTR Gypsy 49.0 -Bica_LTR LTR Gypsy 1.0 +Bica_I-int LTR Gypsy 50.0 +Bica_LTR LTR Gypsy 0.0 CIRCE LTR Gypsy 0.0 Chouto_I-int LTR Gypsy 1.5 Copia1-I_DM LTR Copia 0.0 @@ -37,7 +37,7 @@ G5A_DM LINE Jockey 0.0 G5_DM LINE Jockey 0.0 G6_DM LINE Jockey 0.0 -GA-rich Low_complexity Low_complexity 0.67 +GA-rich Low_complexity Low_complexity 0.0 GTWIN_I-int LTR Gypsy 12.5 G_DM LINE Jockey 0.0 Gypsy11_I-int LTR Gypsy 0.0 @@ -48,7 +48,7 @@ Gypsy3_LTR LTR Gypsy 0.0 Gypsy4_I-int LTR Gypsy 0.0 Gypsy5_I-int LTR Gypsy 0.0 -Gypsy6A_LTR LTR Gypsy 1.0 +Gypsy6A_LTR LTR Gypsy 0.0 Gypsy6_I-int LTR Gypsy 31.0 Gypsy8_I-int LTR Gypsy 0.0 Gypsy8_LTR LTR Gypsy 0.0 @@ -73,29 +73,29 @@ MAX_LTR LTR Pao 2.0 MDG1_I-int LTR Gypsy 0.0 MDG1_LTR LTR Gypsy 0.0 -MDG3_I-int LTR Gypsy 156.5 -MDG3_LTR LTR Gypsy 2.5 +MDG3_I-int LTR Gypsy 156.0 +MDG3_LTR LTR Gypsy 3.0 MICROPIA_I-int LTR Gypsy 51.0 MICROPIA_LTR LTR Gypsy 2.0 Mariner2_DM DNA TcMar-Tc1 0.0 NINJA_I-int LTR Pao 0.0 NOMAD_I-int LTR Gypsy 0.0 -PROTOP_A DNA P 50.0 -PROTOP_B DNA P 10.0 +PROTOP_A DNA P 55.0 +PROTOP_B DNA P 5.0 QUASIMODO2-I_DM LTR Gypsy 43.0 QUASIMODO2-LTR_DM LTR Gypsy 0.0 -QUASIMODO_I-int LTR Gypsy 108.0 -QUASIMODO_LTR LTR Gypsy 23.0 +QUASIMODO_I-int LTR Gypsy 105.0 +QUASIMODO_LTR LTR Gypsy 25.0 R1_DM LINE R1 0.0 ROOA_I-int LTR Pao 0.0 ROOA_LTR LTR Pao 0.0 ROVER-I_DM LTR Gypsy 414.0 -ROVER-LTR_DM LTR Gypsy 6.0 +ROVER-LTR_DM LTR Gypsy 5.0 S2_DM DNA TcMar-Tc1 0.0 -STALKER4_I-int LTR Gypsy 143.5 +STALKER4_I-int LTR Gypsy 146.5 STALKER4_LTR LTR Gypsy 25.0 S_DM DNA TcMar-Tc1 53.0 -Stalker2_I-int LTR Gypsy 103.0 +Stalker2_I-int LTR Gypsy 99.0 Stalker2_LTR LTR Gypsy 3.0 TART-A LINE Jockey 4.0 TART_B1 LINE Jockey 21.0 @@ -109,7 +109,7 @@ _AAT_n Simple_repeat Simple_repeat 0.0 _ACAATAG_n Simple_repeat Simple_repeat 0.0 _ACC_n Simple_repeat Simple_repeat 0.0 -_AGAGAAG_n Simple_repeat Simple_repeat 2.17 +_AGAGAAG_n Simple_repeat Simple_repeat 2.5 _AGAGA_n Simple_repeat Simple_repeat 43.0 _ATAAT_n Simple_repeat Simple_repeat 0.0 _ATATATT_n Simple_repeat Simple_repeat 0.0 @@ -120,7 +120,7 @@ _A_n Simple_repeat Simple_repeat 0.0 _CATA_n Simple_repeat Simple_repeat 0.0 _CTTTT_n Simple_repeat Simple_repeat 0.0 -_GAGAA_n Simple_repeat Simple_repeat 45.17 +_GAGAA_n Simple_repeat Simple_repeat 45.5 _GCCTTT_n Simple_repeat Simple_repeat 0.0 _TAATAT_n Simple_repeat Simple_repeat 0.0 _TAATA_n Simple_repeat Simple_repeat 0.0