changeset 9:2b61c6407efb draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
author artbio
date Thu, 25 Apr 2024 16:22:34 +0000
parents 567549a49eb2
children
files RepEnrich2.py macros.xml test-data/chrY_paired_class_fraction_counts.tab test-data/chrY_paired_family_fraction_counts.tab test-data/chrY_paired_fraction_counts.tab
diffstat 5 files changed, 31 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/RepEnrich2.py	Thu Apr 25 09:06:48 2024 +0000
+++ b/RepEnrich2.py	Thu Apr 25 16:22:34 2024 +0000
@@ -109,9 +109,13 @@
     '''
     write to files to save memory
     '''
-    metagenome, fastqfile = args
-    b_opt = "-k 1 -p 1 --quiet --no-hd --no-unal"
-    command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile}")
+    metagenome = args
+    b_opt = "-k 1 -p 2 --quiet --no-hd --no-unal"
+    if paired_end is True:
+        command = shlex.split(f"bowtie2 {b_opt} -x {metagenome}"
+                              f" -1 {fastqfile_1} -2 {fastqfile_1}")
+    else:
+        command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile_1}")
     bowtie_align = subprocess.run(command, check=True,
                                   capture_output=True, text=True).stdout
     bowtie_align = bowtie_align.rstrip('\r\n').split('\n')
@@ -123,17 +127,14 @@
 
 
 # multimapper parsing
-args_list = [(metagenome, fastqfile_1) for metagenome in repeat_list]
-if paired_end:
-    args_list.extend([(metagenome, fastqfile_2) for
-                     metagenome in repeat_list])
+args_list = [metagenome for metagenome in repeat_list]
 with ProcessPoolExecutor(max_workers=cpus) as executor:
     results = executor.map(run_bowtie, args_list)
 
 # Aggregate results (avoiding race conditions)
 metagenome_reads = defaultdict(list)  # metagenome: list of multimap reads
 
-# Now we read .reads file to populate metagnomes_reads
+# Now we read .reads files to populate metagnomes_reads
 for metagenome in repeat_list:
     with open(f"{metagenome}.reads") as readfile:
         for read in readfile:
--- a/macros.xml	Thu Apr 25 09:06:48 2024 +0000
+++ b/macros.xml	Thu Apr 25 16:22:34 2024 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">2.31.1</token>
-    <token name="@VERSION_SUFFIX@">8</token>
+    <token name="@VERSION_SUFFIX@">9</token>
     <token name="@PROFILE@">23.0</token>
 
     <xml name="repenrich_requirements">
--- a/test-data/chrY_paired_class_fraction_counts.tab	Thu Apr 25 09:06:48 2024 +0000
+++ b/test-data/chrY_paired_class_fraction_counts.tab	Thu Apr 25 16:22:34 2024 +0000
@@ -1,6 +1,6 @@
 DNA	184.0
 LINE	227.0
-LTR	27179.0
-Low_complexity	0.67
+LTR	27175.0
+Low_complexity	0.0
 RC	0.0
-Simple_repeat	90.33
+Simple_repeat	91.0
--- a/test-data/chrY_paired_family_fraction_counts.tab	Thu Apr 25 09:06:48 2024 +0000
+++ b/test-data/chrY_paired_family_fraction_counts.tab	Thu Apr 25 16:22:34 2024 +0000
@@ -1,13 +1,13 @@
 CMC-Transib	30.0
 CR1	4.0
 Copia	25880.0
-Gypsy	1238.0
+Gypsy	1234.0
 Helitron	0.0
 Jockey	107.0
 LOA	0.0
-Low_complexity	0.67
+Low_complexity	0.0
 P	60.0
 Pao	61.0
 R1	116.0
-Simple_repeat	90.33
+Simple_repeat	91.0
 TcMar-Tc1	94.0
--- a/test-data/chrY_paired_fraction_counts.tab	Thu Apr 25 09:06:48 2024 +0000
+++ b/test-data/chrY_paired_fraction_counts.tab	Thu Apr 25 16:22:34 2024 +0000
@@ -8,8 +8,8 @@
 BS2	LINE	Jockey	58.0
 BURDOCK_I-int	LTR	Gypsy	0.0
 Baggins1	LINE	LOA	0.0
-Bica_I-int	LTR	Gypsy	49.0
-Bica_LTR	LTR	Gypsy	1.0
+Bica_I-int	LTR	Gypsy	50.0
+Bica_LTR	LTR	Gypsy	0.0
 CIRCE	LTR	Gypsy	0.0
 Chouto_I-int	LTR	Gypsy	1.5
 Copia1-I_DM	LTR	Copia	0.0
@@ -37,7 +37,7 @@
 G5A_DM	LINE	Jockey	0.0
 G5_DM	LINE	Jockey	0.0
 G6_DM	LINE	Jockey	0.0
-GA-rich	Low_complexity	Low_complexity	0.67
+GA-rich	Low_complexity	Low_complexity	0.0
 GTWIN_I-int	LTR	Gypsy	12.5
 G_DM	LINE	Jockey	0.0
 Gypsy11_I-int	LTR	Gypsy	0.0
@@ -48,7 +48,7 @@
 Gypsy3_LTR	LTR	Gypsy	0.0
 Gypsy4_I-int	LTR	Gypsy	0.0
 Gypsy5_I-int	LTR	Gypsy	0.0
-Gypsy6A_LTR	LTR	Gypsy	1.0
+Gypsy6A_LTR	LTR	Gypsy	0.0
 Gypsy6_I-int	LTR	Gypsy	31.0
 Gypsy8_I-int	LTR	Gypsy	0.0
 Gypsy8_LTR	LTR	Gypsy	0.0
@@ -73,29 +73,29 @@
 MAX_LTR	LTR	Pao	2.0
 MDG1_I-int	LTR	Gypsy	0.0
 MDG1_LTR	LTR	Gypsy	0.0
-MDG3_I-int	LTR	Gypsy	156.5
-MDG3_LTR	LTR	Gypsy	2.5
+MDG3_I-int	LTR	Gypsy	156.0
+MDG3_LTR	LTR	Gypsy	3.0
 MICROPIA_I-int	LTR	Gypsy	51.0
 MICROPIA_LTR	LTR	Gypsy	2.0
 Mariner2_DM	DNA	TcMar-Tc1	0.0
 NINJA_I-int	LTR	Pao	0.0
 NOMAD_I-int	LTR	Gypsy	0.0
-PROTOP_A	DNA	P	50.0
-PROTOP_B	DNA	P	10.0
+PROTOP_A	DNA	P	55.0
+PROTOP_B	DNA	P	5.0
 QUASIMODO2-I_DM	LTR	Gypsy	43.0
 QUASIMODO2-LTR_DM	LTR	Gypsy	0.0
-QUASIMODO_I-int	LTR	Gypsy	108.0
-QUASIMODO_LTR	LTR	Gypsy	23.0
+QUASIMODO_I-int	LTR	Gypsy	105.0
+QUASIMODO_LTR	LTR	Gypsy	25.0
 R1_DM	LINE	R1	0.0
 ROOA_I-int	LTR	Pao	0.0
 ROOA_LTR	LTR	Pao	0.0
 ROVER-I_DM	LTR	Gypsy	414.0
-ROVER-LTR_DM	LTR	Gypsy	6.0
+ROVER-LTR_DM	LTR	Gypsy	5.0
 S2_DM	DNA	TcMar-Tc1	0.0
-STALKER4_I-int	LTR	Gypsy	143.5
+STALKER4_I-int	LTR	Gypsy	146.5
 STALKER4_LTR	LTR	Gypsy	25.0
 S_DM	DNA	TcMar-Tc1	53.0
-Stalker2_I-int	LTR	Gypsy	103.0
+Stalker2_I-int	LTR	Gypsy	99.0
 Stalker2_LTR	LTR	Gypsy	3.0
 TART-A	LINE	Jockey	4.0
 TART_B1	LINE	Jockey	21.0
@@ -109,7 +109,7 @@
 _AAT_n	Simple_repeat	Simple_repeat	0.0
 _ACAATAG_n	Simple_repeat	Simple_repeat	0.0
 _ACC_n	Simple_repeat	Simple_repeat	0.0
-_AGAGAAG_n	Simple_repeat	Simple_repeat	2.17
+_AGAGAAG_n	Simple_repeat	Simple_repeat	2.5
 _AGAGA_n	Simple_repeat	Simple_repeat	43.0
 _ATAAT_n	Simple_repeat	Simple_repeat	0.0
 _ATATATT_n	Simple_repeat	Simple_repeat	0.0
@@ -120,7 +120,7 @@
 _A_n	Simple_repeat	Simple_repeat	0.0
 _CATA_n	Simple_repeat	Simple_repeat	0.0
 _CTTTT_n	Simple_repeat	Simple_repeat	0.0
-_GAGAA_n	Simple_repeat	Simple_repeat	45.17
+_GAGAA_n	Simple_repeat	Simple_repeat	45.5
 _GCCTTT_n	Simple_repeat	Simple_repeat	0.0
 _TAATAT_n	Simple_repeat	Simple_repeat	0.0
 _TAATA_n	Simple_repeat	Simple_repeat	0.0