Previous changeset 4:a16313517e55 (2022-02-02) Next changeset 6:2131d7dca455 (2023-02-27) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6 |
modified:
customizemetadata.py formatoutput.py macros.xml metaphlan.xml test-data/test-db.json test-data/test-db/test-db.pkl |
added:
test-data/test-db/test-db.1.bt2l test-data/test-db/test-db.2.bt2l test-data/test-db/test-db.3.bt2l test-data/test-db/test-db.4.bt2l test-data/test-db/test-db.rev.1.bt2l test-data/test-db/test-db.rev.2.bt2l |
removed:
test-data/test-db/test-db.1.bt2 test-data/test-db/test-db.2.bt2 test-data/test-db/test-db.3.bt2 test-data/test-db/test-db.4.bt2 test-data/test-db/test-db.rev.1.bt2 test-data/test-db/test-db.rev.2.bt2 |
b |
diff -r a16313517e55 -r 6dee4abadccb customizemetadata.py --- a/customizemetadata.py Wed Feb 02 22:03:11 2022 +0000 +++ b/customizemetadata.py Sat Dec 03 10:43:21 2022 +0000 |
[ |
@@ -56,6 +56,7 @@ 'taxonomy': in_metadata['taxonomy'], 'merged_taxon': {} } + # transform merged_taxons tuple keys to string for k in in_metadata['merged_taxon']: n = ' , '.join(k) @@ -85,6 +86,13 @@ n = ' , '.split(k) out_metadata[n] = in_metadata['merged_taxon'][k] + # Ensure that there are 8 taxonomy levels (for compatibility between Metaphlan v3 and v4) + # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432) + # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432) + for k in out_metadata['taxonomy']: + if out_metadata['taxonomy'][k][0].count('|') == 6: + out_metadata['taxonomy'][k] = (out_metadata['taxonomy'][k][0] + '|', out_metadata['taxonomy'][k][1]) + # dump metadata to Pickle file with bz2.BZ2File(pkl_fp, 'w') as pkl_f: pickle.dump(out_metadata, pkl_f) |
b |
diff -r a16313517e55 -r 6dee4abadccb formatoutput.py --- a/formatoutput.py Wed Feb 02 22:03:11 2022 +0000 +++ b/formatoutput.py Sat Dec 03 10:43:21 2022 +0000 |
[ |
@@ -57,9 +57,11 @@ # skip headers if line.startswith("#"): continue - # skip UNKNOWN lines in Predicted taxon relative abundances - if "UNKNOWN" in line: + + # skip UNKNOWN (v3) or UNCLASSIFIED (v4) lines in predicted taxon relative abundances + if "UNKNOWN" in line or 'UNCLASSIFIED' in line: continue + # spit lines split_line = line[:-1].split('\t') taxo_n = split_line[0].split('|') |
b |
diff -r a16313517e55 -r 6dee4abadccb macros.xml --- a/macros.xml Wed Feb 02 22:03:11 2022 +0000 +++ b/macros.xml Sat Dec 03 10:43:21 2022 +0000 |
b |
@@ -1,6 +1,6 @@ <?xml version="1.0"?> <macros> - <token name="@TOOL_VERSION@">3.0.14</token> + <token name="@TOOL_VERSION@">4.0.3</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">20.01</token> <xml name="edam_ontology"> |
b |
diff -r a16313517e55 -r 6dee4abadccb metaphlan.xml --- a/metaphlan.xml Wed Feb 02 22:03:11 2022 +0000 +++ b/metaphlan.xml Sat Dec 03 10:43:21 2022 +0000 |
b |
@@ -107,7 +107,7 @@ #if $inputs.db.db_selector == "history" mkdir 'ref_db' && -bowtie2-build '$inputs.db.bowtie2db' 'ref_db/custom_db' +bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db' && python '$__tool_directory__/customizemetadata.py' transform_json_to_pkl @@ -123,9 +123,15 @@ --read_min_len $inputs.in.read_min_len --bt2_ps '$inputs.in.mapping.bt2_ps' --min_mapq_val $inputs.in.mapping.min_mapq_val + #if $ext == "sam" + --nreads \$(cat '$file_path' | grep -c -v '^@') + #end if #else '$inputs.in.in' --input_type '$inputs.in.selector' + #if $inputs.in.selector == "sam" + --nreads \$(cat '$inputs.in.in' | grep -c -v '^@') + #end if #end if #if $inputs.db.db_selector == "cached" --bowtie2db '$inputs.db.cached_db.fields.path' @@ -334,7 +340,7 @@ <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/> <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue='' label="Use a species as representative for species groups?"/> - <param name="legacy_output" argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue='' + <param argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue='' label="Old MetaPhlAn2 two columns output?"/> <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue='' label="Report the profiling using the CAMI output format?"/> @@ -408,7 +414,7 @@ </section> <output name="output_file" ftype="tabular"> <assert_contents> - <has_text text="UNKNOWN"/> + <has_text text="UNCLASSIFIED"/> </assert_contents> </output> <output name="bowtie2out" ftype="tabular"> @@ -503,7 +509,8 @@ <output name="krona_output_file" ftype="tabular"> <assert_contents> <not_has_text text="k__Bacteria"/> - <has_n_lines n="0"/> + <has_n_lines n="1" delta="1"/> + <has_size value="1" delta="1"/> </assert_contents> </output> </test> @@ -1051,7 +1058,7 @@ ============ MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, -Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level. +Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) at species-level. MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing: @@ -1077,8 +1084,8 @@ aggregated marker information. Such capability comes with several slightly different flavours and are a way to perform strain tracking and comparison across multiple samples. -Usually, MetaPhlAn is first ran with the default parameter for the type of analysis to profile the -species present in the community, and then a strain-level profiling can be performed to zoom-in into +Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the +species present in the community, and then a strain-level profiling can be performed to zoom-in on specific species of interest. This operation can be performed quickly as it exploits the bowtie2out intermediate file saved during the execution of the default analysis type. @@ -1087,16 +1094,16 @@ Metaphlan takes as input either: -- one or several sequence files in Fasta, FastQ (compressed or not) +- one or several sequence files in Fasta, FastQ (whether compressed or not) - a BowTie2 produced SAM file - an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run -It also need the reference database, which can be locally installed or customized using the dedicated tools. +It also needs the reference database, which can be locally installed or customized using the dedicated tools. Outputs ======= -The main output file is a tab-separated file with the predicted taxon relative abundances. +The main output is a tab-separated file with the predicted taxon relative abundances. It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs. @@ -1106,7 +1113,7 @@ To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_. -.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#basic-usage +.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#Basic-Usage ]]></help> <expand macro="citations"/> |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db.json --- a/test-data/test-db.json Wed Feb 02 22:03:11 2022 +0000 +++ b/test-data/test-db.json Sat Dec 03 10:43:21 2022 +0000 |
[ |
b'@@ -1,1 +1,1 @@\n-{"markers": {"504553__GeneID:10498608": {"ext": ["GCA_000144505", "GCA_000144125", "GCA_000488875", "GCA_000178055", "GCA_001660965", "GCA_000144445", "GCA_003384145", "GCA_002572705", "GCA_001660935", "GCA_001750525", "GCA_000144775", "GCA_003390995", "GCA_001750535", "GCA_000008345", "GCA_003384705", "GCA_003384485", "GCA_002572745", "GCA_000144465", "GCA_000145375", "GCA_002775655", "GCA_001660855", "GCA_001660945", "GCA_000144875", "GCA_003426625", "GCA_000240055", "GCA_003426225", "GCA_003384285", "GCA_000144815", "GCA_000145095", "GCA_900102845", "GCA_001469595", "GCA_000145195", "GCA_003384365", "GCA_000145155", "GCA_003384255", "GCA_003384315", "GCA_000144345", "GCA_000240015", "GCA_000302515", "GCA_002831715", "GCA_000144285", "GCA_003426665", "GCA_001469635", "GCA_000144185", "GCA_002572835", "GCA_001660115", "GCA_000144325", "GCA_000144735", "GCA_000194825", "GCA_000376705", "GCA_000147145", "GCA_002572575", "GCA_000144795", "GCA_003384385", "GCA_000144895", "GCA_003425775", "GCA_003384345", "GCA_000231215", "GCA_003384555", "GCA_000735055", "GCA_000145575", "GCA_000144005", "GCA_003384195", "GCA_001469555", "GCA_003384585", "GCA_003384395", "GCA_000145455", "GCA_000144485", "GCA_003384495", "GCA_000342585", "GCA_003384445", "GCA_000144365", "GCA_000730485", "GCA_002861085", "GCA_002572615", "GCA_000144145", "GCA_002556485", "GCA_001481615", "GCA_000144245", "GCA_000144545", "GCA_000221125", "GCA_001469655", "GCA_003426685", "GCA_000144045", "GCA_002572665", "GCA_002572695", "GCA_002572655", "GCA_001750555", "GCA_003426255", "GCA_000145335", "GCA_002572775"], "score": 37.0, "clade": "t__Propionibacterium_phage_PAS50", "len": 663, "taxon": "k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Caudovirales|f__Siphoviridae|g__Pa6virus|s__Propionibacterium_virus_PAS50|t__Propionibacterium_phage_PAS50"}, "386414__D1VYE0__HMPREF9019_1663": {"clade": "s__Prevotella_timonensis", "ext": [], "len": 1065, "score": 0, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella|s__Prevotella_timonensis"}, "29394__H3NGH1__B5772_02200": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 972, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NFY1__B8A41_07655": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 534, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NC06__B8A41_08715": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 1317, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NDE2__B8A45_00965": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 699, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NFL8__B8A45_06985": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 582, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NDD1__B8A45_02325": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 480, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NE37__B8A45_05170": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 756, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NCU9__B8A45_08665": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 765, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "2'..b'terobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076675": ["2|1224|1236|91347|543|547|354276|", 4289814], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000958695": ["2|1224|1236|91347|543|547|354276|", 4336076], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900077255": ["2|1224|1236|91347|543|547|354276|", 4482974], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075885": ["2|1224|1236|91347|543|547|354276|", 4396160], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001487035": ["2|1224|1236|91347|543|547|354276|", 4279410], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900077025": ["2|1224|1236|91347|543|547|354276|", 4303785], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000952555": ["2|1224|1236|91347|543|547|354276|", 4340464], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076755": ["2|1224|1236|91347|543|547|354276|", 4236759], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000783855": ["2|1224|1236|91347|543|547|354276|", 4188638], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001022075": ["2|1224|1236|91347|543|547|354276|", 4412384], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001022255": ["2|1224|1236|91347|543|547|354276|", 4385357], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075375": ["2|1224|1236|91347|543|547|354276|", 4164369], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075395": ["2|1224|1236|91347|543|547|354276|", 4013880], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_003261215": ["2|1224|1236|91347|543|547|354276|", 5016502], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075185": ["2|1224|1236|91347|543|547|354276|", 4392401], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_002192395": ["2|1224|1236|91347|543|547|354276|", 4556770], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000958745": ["2|1224|1236|91347|543|547|354276|", 4382425], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001025055": ["2|1224|1236|91347|543|547|354276|", 4360879], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076455": ["2|1224|1236|91347|543|547|354276|", 4404432]}, "merged_taxon": {}}\n\\ No newline at end of file\n' |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.1.bt2 |
b |
Binary file test-data/test-db/test-db.1.bt2 has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.1.bt2l |
b |
Binary file test-data/test-db/test-db.1.bt2l has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.2.bt2 |
b |
Binary file test-data/test-db/test-db.2.bt2 has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.2.bt2l |
b |
Binary file test-data/test-db/test-db.2.bt2l has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.3.bt2 |
b |
Binary file test-data/test-db/test-db.3.bt2 has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.3.bt2l |
b |
Binary file test-data/test-db/test-db.3.bt2l has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.4.bt2 |
b |
Binary file test-data/test-db/test-db.4.bt2 has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.4.bt2l |
b |
Binary file test-data/test-db/test-db.4.bt2l has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.pkl |
b |
Binary file test-data/test-db/test-db.pkl has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.rev.1.bt2 |
b |
Binary file test-data/test-db/test-db.rev.1.bt2 has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.rev.1.bt2l |
b |
Binary file test-data/test-db/test-db.rev.1.bt2l has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.rev.2.bt2 |
b |
Binary file test-data/test-db/test-db.rev.2.bt2 has changed |
b |
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.rev.2.bt2l |
b |
Binary file test-data/test-db/test-db.rev.2.bt2l has changed |