changeset 20:ea8146ee148f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/busco/ commit 9bf18dbcef34a48d3c40150ee953706bececfdd1
author iuc
date Mon, 18 Mar 2024 16:01:49 +0000 (9 months ago)
parents 2a5b8b9936bf
children 2babe6d5c561
files busco.xml test-data/genome_results/short_summary test-data/genome_results_metaeuk/short_summary test-data/genome_results_metaeuk_auto/out.gff test-data/proteome_results/short_summary test-data/transcriptome_results/short_summary
diffstat 6 files changed, 75 insertions(+), 71 deletions(-) [+]
line wrap: on
line diff
--- a/busco.xml	Tue Nov 14 13:03:37 2023 +0000
+++ b/busco.xml	Mon Mar 18 16:01:49 2024 +0000
@@ -209,7 +209,7 @@
                 </conditional>
             </conditional>
             <param name="outputs" value="short_summary,missing,gff" />
-            <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="6" />
+            <output name="busco_sum" file="genome_results/short_summary" compare="re_match_multiline" />
             <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4" />
             <output name="busco_gff">
                 <assert_contents>
@@ -233,7 +233,7 @@
                 <param name="mode" value="prot" />
             </conditional>
             <param name="outputs" value="short_summary,missing,image,gff" />
-            <output name="busco_sum" file="proteome_results/short_summary" compare="diff" lines_diff="6" />
+            <output name="busco_sum" file="proteome_results/short_summary" compare="re_match_multiline" />
             <output name="busco_table" file="proteome_results/full_table" compare="diff" lines_diff="4" />
             <output name="busco_missing" file="proteome_results/missing_buscos_list" compare="diff" lines_diff="4" />
             <output name="summary_image" file="proteome_results/summary.png" compare="sim_size" />
@@ -254,7 +254,7 @@
                 <param name="mode" value="tran" />
             </conditional>
             <param name="outputs" value="short_summary,missing,image,gff" />
-            <output name="busco_sum" file="transcriptome_results/short_summary" compare="diff" lines_diff="6" />
+            <output name="busco_sum" file="transcriptome_results/short_summary" compare="re_match_multiline" />
             <output name="busco_table" file="transcriptome_results/full_table" compare="diff" lines_diff="6" />
             <output name="busco_missing" file="transcriptome_results/missing_buscos_list" compare="diff" lines_diff="6" />
             <output name="summary_image" file="transcriptome_results/summary.png" compare="sim_size" />
@@ -278,7 +278,7 @@
                 </conditional>
             </conditional>
             <param name="outputs" value="short_summary,gff" />
-            <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="6" />
+            <output name="busco_sum" file="genome_results/short_summary" compare="re_match_multiline"/>
             <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4" />
             <output name="busco_gff">
                 <assert_contents>
@@ -306,7 +306,7 @@
                 </conditional>
             </conditional>
             <param name="outputs" value="short_summary,missing" />
-            <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="6" />
+            <output name="busco_sum" file="genome_results/short_summary" compare="re_match_multiline" />
             <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4" />
             <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4" />
         </test>
@@ -323,11 +323,15 @@
                 </conditional>
             </conditional>
             <param name="outputs" value="short_summary,missing,image,gff" />
-            <output name="busco_sum" file="genome_results_metaeuk/short_summary" compare="diff" lines_diff="6" />
+            <output name="busco_sum" file="genome_results_metaeuk/short_summary" compare="re_match_multiline" />
             <output name="busco_table" file="genome_results_metaeuk/full_table" compare="diff" lines_diff="6" />
             <output name="busco_missing" file="genome_results_metaeuk/missing_buscos_list" compare="diff" lines_diff="6" />
             <output name="summary_image" file="genome_results_metaeuk/summary.png" compare="sim_size" />
-            <output name="busco_gff" file="genome_results_metaeuk/out.gff" compare="diff" />
+            <output name="busco_gff" ftype="gff3">
+                <assert_contents>
+                    <has_n_lines n="13"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="5">
             <param name="input" value="genome.fa" />
@@ -343,7 +347,7 @@
             </conditional>
             <param name="outputs" value="short_summary,missing,image,gff" />
             <output name="busco_sum" file="genome_results_metaeuk_auto/short_summary" compare="diff" lines_diff="6" />
-            <output name="busco_table" file="genome_results_metaeuk_auto/full_table" compare="diff" lines_diff="2" />
+            <output name="busco_table" file="genome_results_metaeuk_auto/full_table" compare="diff" lines_diff="6" />
             <output name="busco_missing" file="genome_results_metaeuk_auto/missing_buscos_list" compare="diff" lines_diff="2" />
             <output name="summary_image" file="genome_results_metaeuk_auto/summary.png" compare="sim_size" />
             <output name="busco_gff" file="genome_results_metaeuk_auto/out.gff" compare="diff" />
--- a/test-data/genome_results/short_summary	Tue Nov 14 13:03:37 2023 +0000
+++ b/test-data/genome_results/short_summary	Mon Mar 18 16:01:49 2024 +0000
@@ -1,35 +1,35 @@
-# BUSCO version is: 5.5.0 
-# The lineage dataset is: arthropoda_odb10 (Creation date: 2020-09-10, number of genomes: 90, number of BUSCOs: 1013)
-# Summarized benchmarking in BUSCO notation for file /tmp/tmpbmq1q2c6/files/8/e/b/dataset_8eb218b1-f367-4315-9915-554c009808cb.dat
+# BUSCO version is: 5\.5\.0 
+# The lineage dataset is: arthropoda_odb10 \(Creation date: [0-9]{4}-[0-9]{2}-[0-9]{2}, number of genomes: 90, number of BUSCOs: 1013\)
+# Summarized benchmarking in BUSCO notation for file [a-z0-9_\-/\.]+
 # BUSCO was run in mode: euk_genome_aug
 # Gene predictor used: augustus
 
-	***** Results: *****
+	\*\*\*\*\* Results: \*\*\*\*\*
 
-	C:0.1%[S:0.1%,D:0.0%],F:0.0%,M:99.9%,n:1013	   
-	1	Complete BUSCOs (C)			   
-	1	Complete and single-copy BUSCOs (S)	   
-	0	Complete and duplicated BUSCOs (D)	   
-	0	Fragmented BUSCOs (F)			   
-	1012	Missing BUSCOs (M)			   
+	C:0\.1%\[S:0\.1%,D:0\.0%\],F:0\.0%,M:99\.9%,n:1013	   
+	1	Complete BUSCOs \(C\)			   
+	1	Complete and single-copy BUSCOs \(S\)	   
+	0	Complete and duplicated BUSCOs \(D\)	   
+	0	Fragmented BUSCOs \(F\)			   
+	1012	Missing BUSCOs \(M\)			   
 	1013	Total BUSCO groups searched		   
 
 Assembly Statistics:
 	1	Number of scaffolds
 	1	Number of contigs
 	62370	Total length
-	0.000%	Percent gaps
+	0\.000%	Percent gaps
 	62 KB	Scaffold N50
 	62 KB	Contigs N50
 
 
 Dependencies and versions:
-	hmmsearch: 3.1
-	bbtools: 39.01
-	makeblastdb: 2.14.0+
-	tblastn: 2.14.0+
-	augustus: 3.5.0
-	gff2gbSmallDNA.pl: None
-	new_species.pl: None
+	hmmsearch: [0-9\.\+]+
+	bbtools: [0-9\.\+]+
+	makeblastdb: [0-9\.\+]+
+	tblastn: [0-9\.\+]+
+	augustus: [0-9\.\+]+
+	gff2gbSmallDNA\.pl: None
+	new_species\.pl: None
 	etraining: None
-	busco: 5.5.0
+	busco: [0-9\.\+]+
--- a/test-data/genome_results_metaeuk/short_summary	Tue Nov 14 13:03:37 2023 +0000
+++ b/test-data/genome_results_metaeuk/short_summary	Mon Mar 18 16:01:49 2024 +0000
@@ -1,30 +1,30 @@
-# BUSCO version is: 5.5.0 
-# The lineage dataset is: arthropoda_odb10 (Creation date: 2020-09-10, number of genomes: 90, number of BUSCOs: 1013)
-# Summarized benchmarking in BUSCO notation for file /tmp/tmp5_syjrgy/files/1/d/2/dataset_1d20a949-adce-4511-8cab-345d59a61a05.dat
+# BUSCO version is: 5\.5\.0 
+# The lineage dataset is: arthropoda_odb10 \(Creation date: [0-9]{4}-[0-9]{2}-[0-9]{2}, number of genomes: 90, number of BUSCOs: 1013\)
+# Summarized benchmarking in BUSCO notation for file [a-z0-9_\-/\.]+
 # BUSCO was run in mode: euk_genome_met
 # Gene predictor used: metaeuk
 
-	***** Results: *****
+	\*\*\*\*\* Results: \*\*\*\*\*
 
-	C:0.2%[S:0.2%,D:0.0%],F:0.0%,M:99.8%,n:1013	   
-	2	Complete BUSCOs (C)			   
-	2	Complete and single-copy BUSCOs (S)	   
-	0	Complete and duplicated BUSCOs (D)	   
-	0	Fragmented BUSCOs (F)			   
-	1011	Missing BUSCOs (M)			   
+	C:0\.2%\[S:0\.2%,D:0\.0%\],F:0\.0%,M:99\.8%,n:1013	   
+	2	Complete BUSCOs \(C\)			   
+	2	Complete and single-copy BUSCOs \(S\)	   
+	0	Complete and duplicated BUSCOs \(D\)	   
+	0	Fragmented BUSCOs \(F\)			   
+	1011	Missing BUSCOs \(M\)			   
 	1013	Total BUSCO groups searched		   
 
 Assembly Statistics:
 	1	Number of scaffolds
 	1	Number of contigs
 	62370	Total length
-	0.000%	Percent gaps
+	0\.000%	Percent gaps
 	62 KB	Scaffold N50
 	62 KB	Contigs N50
 
 
 Dependencies and versions:
-	hmmsearch: 3.1
-	bbtools: 39.01
-	metaeuk: 6.a5d39d9
-	busco: 5.5.0
+	hmmsearch: [0-9\.\+]+
+	bbtools: [0-9\.\+]+
+	metaeuk: [0-9a-z\.\+]+
+	busco: [0-9\.\+]+
--- a/test-data/genome_results_metaeuk_auto/out.gff	Tue Nov 14 13:03:37 2023 +0000
+++ b/test-data/genome_results_metaeuk_auto/out.gff	Mon Mar 18 16:01:49 2024 +0000
@@ -1,5 +1,5 @@
 ##gff-version 3
-sample	MetaEuk	gene	34846	35679	545	-	.	Target_ID=1053181at2759_7227_0:001f92;TCS_ID=1053181at2759_7227_0:001f92|sample|-|34845
-sample	MetaEuk	mRNA	34846	35679	545	-	.	Target_ID=1053181at2759_7227_0:001f92;TCS_ID=1053181at2759_7227_0:001f92|sample|-|34845_mRNA;Parent=1053181at2759_7227_0:001f92|sample|-|34845
-sample	MetaEuk	exon	34846	35679	545	-	.	Target_ID=1053181at2759_7227_0:001f92;TCS_ID=1053181at2759_7227_0:001f92|sample|-|34845_exon_0;Parent=1053181at2759_7227_0:001f92|sample|-|34845_mRNA
-sample	MetaEuk	CDS	34846	35679	545	-	.	Target_ID=1053181at2759_7227_0:001f92;TCS_ID=1053181at2759_7227_0:001f92|sample|-|34845_CDS_0;Parent=1053181at2759_7227_0:001f92|sample|-|34845_exon_0
+sample	MetaEuk	gene	34846	35694	527	-	.	Target_ID=1053181at2759_7245_0:00200b;TCS_ID=1053181at2759_7245_0:00200b|sample|-|34845
+sample	MetaEuk	mRNA	34846	35694	527	-	.	Target_ID=1053181at2759_7245_0:00200b;TCS_ID=1053181at2759_7245_0:00200b|sample|-|34845_mRNA;Parent=1053181at2759_7245_0:00200b|sample|-|34845
+sample	MetaEuk	exon	34846	35694	527	-	.	Target_ID=1053181at2759_7245_0:00200b;TCS_ID=1053181at2759_7245_0:00200b|sample|-|34845_exon_0;Parent=1053181at2759_7245_0:00200b|sample|-|34845_mRNA
+sample	MetaEuk	CDS	34846	35694	527	-	.	Target_ID=1053181at2759_7245_0:00200b;TCS_ID=1053181at2759_7245_0:00200b|sample|-|34845_CDS_0;Parent=1053181at2759_7245_0:00200b|sample|-|34845_exon_0
--- a/test-data/proteome_results/short_summary	Tue Nov 14 13:03:37 2023 +0000
+++ b/test-data/proteome_results/short_summary	Mon Mar 18 16:01:49 2024 +0000
@@ -1,18 +1,18 @@
-# BUSCO version is: 5.5.0 
-# The lineage dataset is: arthropoda_odb10 (Creation date: 2020-09-10, number of genomes: 90, number of BUSCOs: 1013)
-# Summarized benchmarking in BUSCO notation for file /tmp/tmp5_syjrgy/files/b/a/7/dataset_ba764d57-9365-4d54-a1f8-ac1350df0747.dat
+# BUSCO version is: 5\.5\.0 
+# The lineage dataset is: arthropoda_odb10 \(Creation date: [0-9]{4}-[0-9]{2}-[0-9]{2}, number of genomes: 90, number of BUSCOs: 1013\)
+# Summarized benchmarking in BUSCO notation for file [a-z0-9_\-/\.]+
 # BUSCO was run in mode: proteins
 
-	***** Results: *****
+	\*\*\*\*\* Results: \*\*\*\*\*
 
-	C:0.1%[S:0.1%,D:0.0%],F:0.0%,M:99.9%,n:1013	   
-	1	Complete BUSCOs (C)			   
-	1	Complete and single-copy BUSCOs (S)	   
-	0	Complete and duplicated BUSCOs (D)	   
-	0	Fragmented BUSCOs (F)			   
-	1012	Missing BUSCOs (M)			   
+	C:0\.1%\[S:0\.1%,D:0\.0%\],F:0\.0%,M:99\.9%,n:1013	   
+	1	Complete BUSCOs \(C\)			   
+	1	Complete and single-copy BUSCOs \(S\)	   
+	0	Complete and duplicated BUSCOs \(D\)	   
+	0	Fragmented BUSCOs \(F\)			   
+	1012	Missing BUSCOs \(M\)			   
 	1013	Total BUSCO groups searched		   
 
 Dependencies and versions:
-	hmmsearch: 3.1
-	busco: 5.5.0
+	hmmsearch: [0-9\.\+]+
+	busco: [0-9\.\+]+
--- a/test-data/transcriptome_results/short_summary	Tue Nov 14 13:03:37 2023 +0000
+++ b/test-data/transcriptome_results/short_summary	Mon Mar 18 16:01:49 2024 +0000
@@ -1,19 +1,19 @@
-# BUSCO version is: 5.5.0 
-# The lineage dataset is: arthropoda_odb10 (Creation date: 2020-09-10, number of genomes: 90, number of BUSCOs: 1013)
-# Summarized benchmarking in BUSCO notation for file /tmp/tmp5_syjrgy/files/4/3/b/dataset_43bb7c5f-29dc-4638-a19d-abb98c145794.dat
+# BUSCO version is: 5\.5\.0 
+# The lineage dataset is: arthropoda_odb10 \(Creation date: [0-9]{4}-[0-9]{2}-[0-9]{2}, number of genomes: 90, number of BUSCOs: 1013\)
+# Summarized benchmarking in BUSCO notation for file [a-z0-9_\-/\.]+
 # BUSCO was run in mode: euk_tran
 
-	***** Results: *****
+	\*\*\*\*\* Results: \*\*\*\*\*
 
-	C:0.1%[S:0.1%,D:0.0%],F:0.0%,M:99.9%,n:1013	   
-	1	Complete BUSCOs (C)			   
-	1	Complete and single-copy BUSCOs (S)	   
-	0	Complete and duplicated BUSCOs (D)	   
-	0	Fragmented BUSCOs (F)			   
-	1012	Missing BUSCOs (M)			   
+	C:0\.1%\[S:0\.1%,D:0\.0%\],F:0\.0%,M:99\.9%,n:1013	   
+	1	Complete BUSCOs \(C\)			   
+	1	Complete and single-copy BUSCOs \(S\)	   
+	0	Complete and duplicated BUSCOs \(D\)	   
+	0	Fragmented BUSCOs \(F\)			   
+	1012	Missing BUSCOs \(M\)			   
 	1013	Total BUSCO groups searched		   
 
 Dependencies and versions:
-	hmmsearch: 3.1
-	metaeuk: 6.a5d39d9
-	busco: 5.5.0
+	hmmsearch: [0-9\.\+]+
+	metaeuk: [0-9a-z\.\+]+
+	busco: [0-9\.\+]+