Mercurial > repos > onnodg > cdhit_analysis
diff cdhit_analysis.xml @ 1:ff68835adb2b draft
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit d771f9fbfd42bcdeda1623d954550882a0863847-dirty
| author | onnodg |
|---|---|
| date | Mon, 20 Oct 2025 12:27:31 +0000 |
| parents | 00d56396b32a |
| children | 706b7acdb230 |
line wrap: on
line diff
--- a/cdhit_analysis.xml Tue Oct 14 09:09:46 2025 +0000 +++ b/cdhit_analysis.xml Mon Oct 20 12:27:31 2025 +0000 @@ -1,4 +1,4 @@ -<tool id="cdhit_cluster_analysis" name="CD-HIT Cluster Analysis" version="1.0.0"> +<tool id="cdhit_cluster_analysis" name="CD-HIT Cluster Analysis" version="1.0.1"> <description>Analyze CD-HIT clustering results with taxonomic annotation</description> <requirements> @@ -14,19 +14,19 @@ --input_annotation '$input_annotation' #if $output_options.similarity_output: - --output_similarity_txt '$output_similarity_txt' - --output_similarity_plot '$output_similarity_plot' + --output_similarity_txt '$similarity_txt' + --output_similarity_plot '$similarity_plot' #end if #if $output_options.evalue_output: - --output_evalue_txt '$output_evalue_txt' - --output_evalue_plot '$output_evalue_plot' + --output_evalue_txt '$evalue_txt' + --output_evalue_plot '$evalue_plot' #end if #if $output_options.count_output: - --output_count '$output_count' + --output_count '$cluster_count' #end if #if $output_options.taxa_output: - --output_taxa_clusters '$output_taxa_clusters' - --output_taxa_processed '$output_taxa_processed' + --output_taxa_clusters '$cluster_taxa' + --output_taxa_processed '$processed_taxa' #end if --simi_plot_y_min '$plot_params.simi_plot_y_min' @@ -48,24 +48,24 @@ ]]></command> <inputs> - <param name="input_cluster" type="data" format="txt" label="CD-HIT cluster file (.clstr/.txt)" + <param name="input_cluster" type="data" format="txt" label="CD-HIT cluster file" help="Output cluster file from cd-hit-est" /> <param name="input_annotation" type="data" format="xlsx" - label="Annotation file" - help="Excel workfile with sequence annotations (header, evalue, taxa)" /> + label="Excel Annotations file" + help="Excel workfile with annotations per header" /> <section name="output_options" title="Output Options" expanded="true"> <param name="similarity_output" type="boolean" truevalue="true" falsevalue="false" - checked="true" label="Create similarity output" + checked="true" label="Create cluster similarity output" help="Generate similarity analysis and plots" /> <param name="evalue_output" type="boolean" truevalue="true" falsevalue="false" - checked="true" label="Create E-value output" + checked="true" label="Create cluster E-value output" help="Generate E-value analysis and plots" /> <param name="count_output" type="boolean" truevalue="true" falsevalue="false" - checked="true" label="Create count output" + checked="true" label="Create cluster count output" help="Generate read count summaries" /> <param name="taxa_output" type="boolean" truevalue="true" falsevalue="false" - checked="true" label="Create taxa output" + checked="true" label="Create taxa annotations output" help="Generate taxonomic analysis" /> </section> @@ -104,31 +104,31 @@ </inputs> <outputs> - <data name="output_similarity_txt" format="txt" label="Similarity data" > + <data name="similarity_txt" format="txt" label="Similarity data" > <filter>output_options['similarity_output']</filter> </data> - <data name="output_similarity_plot" format="png" label="Similarity plot" > + <data name="similarity_plot" format="png" label="Similarity plot" > <filter>output_options['similarity_output']</filter> </data> - <data name="output_evalue_txt" format="txt" label="E-value data" > + <data name="evalue_txt" format="txt" label="E-value data" > <filter>output_options['evalue_output']</filter> </data> - <data name="output_evalue_plot" format="png" label="E-value plot" > + <data name="evalue_plot" format="png" label="E-value plot" > <filter>output_options['evalue_output']</filter> </data> - <data name="output_count" format="txt" label="Count summary" > + <data name="cluster_count" format="txt" label="Count summary" > <filter>output_options['count_output']</filter> </data> - <data name="output_taxa_clusters" format="xlsx" label="Raw taxa per cluster" > + <data name="cluster_taxa" format="xlsx" label="Raw taxa per cluster" > <filter>output_options['taxa_output']</filter> </data> - <data name="output_taxa_processed" format="xlsx" label="Processed taxa" > + <data name="processed_taxa" format="xlsx" label="Processed taxa" > <filter>output_options['taxa_output']</filter> </data> </outputs> @@ -143,13 +143,13 @@ <param name="count_output" value="true" /> <param name="taxa_output" value="true" /> </section> - <output name="output_similarity_txt" file="sim_out.txt" /> - <output name="output_similarity_plot" file="sim_out.png" compare="sim_size"/> - <output name="output_evalue_txt" file="evalue_out.txt" /> - <output name="output_evalue_plot" file="evalue_out.png" compare="sim_size"/> - <output name="output_count" file="count_out.txt" /> - <output name="output_taxa_clusters" file="taxa_out.xlsx" decompress="true"/> - <output name="output_taxa_processed" file="processed.xlsx" decompress="true"/> + <output name="similarity_txt" file="sim_out.txt" /> + <output name="similarity_plot" file="sim_out.png" compare="sim_size"/> + <output name="evalue_txt" file="evalue_out.txt" /> + <output name="evalue_plot" file="evalue_out.png" compare="sim_size"/> + <output name="cluster_count" file="count_out.txt" /> + <output name="cluster_taxa" file="taxa_out.xlsx" decompress="true"/> + <output name="processed_taxa" file="processed.xlsx" decompress="true"/> </test> <test expect_num_outputs="7"> <param name="input_cluster" value="input2_test.clstr.txt" /> @@ -160,13 +160,13 @@ <param name="count_output" value="true" /> <param name="taxa_output" value="true" /> </section> - <output name="output_similarity_txt" file="test2_sim_out.txt" /> - <output name="output_similarity_plot" file="test2_sim_out.png" compare="sim_size"/> - <output name="output_evalue_txt" file="test2_evalue_out.txt" /> - <output name="output_evalue_plot" file="test2_evalue_out.png" compare="sim_size"/> - <output name="output_count" file="test_2count_out.txt" /> - <output name="output_taxa_clusters" file="test_2taxa_out.xlsx" decompress="true"/> - <output name="output_taxa_processed" file="test_2processed.xlsx" decompress="true"/> + <output name="similarity_txt" file="test2_sim_out.txt" /> + <output name="similarity_plot" file="test2_sim_out.png" compare="sim_size"/> + <output name="evalue_txt" file="test2_evalue_out.txt" /> + <output name="evalue_plot" file="test2_evalue_out.png" compare="sim_size"/> + <output name="cluster_count" file="test_2count_out.txt" /> + <output name="cluster_taxa" file="test_2taxa_out.xlsx" decompress="true"/> + <output name="processed_taxa" file="test_2processed.xlsx" decompress="true"/> </test> <test expect_num_outputs="5"> <param name="input_cluster" value="input2_test.clstr.txt" /> @@ -178,7 +178,7 @@ <param name="evalue_output" value="false" /> </section> <section name="processing_options"> - <param name="show_unnanotated_clusters" value="true"/> + <param name="show_unannotated_clusters" value="true"/> <param name="make_taxa_in_cluster_split" value="true"/> <param name="print_empty_files" value="true"/> </section> @@ -187,15 +187,15 @@ <param name="min_to_split" value="0.6"/> <param name="min_count_to_split" value="6"/> </section> - <section name="plot_params" title="Plot Parameters" expanded="false"> + <section name="plot_params"> <param name="simi_plot_y_min" value="0.4" /> <param name="simi_plot_y_max" value="0.4" /> </section> - <output name="output_similarity_txt" file="test2_sim_extra_out.txt" /> - <output name="output_similarity_plot" file="test2_sim_extra_out.png" compare="sim_size"/> - <output name="output_count" file="test_2count_extra_out.txt" /> - <output name="output_taxa_clusters" file="test_2taxa_extra_out.xlsx" decompress="true"/> - <output name="output_taxa_processed" file="test_2processed_extra.xlsx" decompress="true"/> + <output name="similarity_txt" file="test2_sim_extra_out.txt" /> + <output name="similarity_plot" file="test2_sim_extra_out.png" compare="sim_size"/> + <output name="cluster_count" file="test_2count_extra_out.txt" /> + <output name="cluster_taxa" file="test_2taxa_extra_out.xlsx" decompress="true"/> + <output name="processed_taxa" file="test_2processed_extra.xlsx" decompress="true"/> </test> </tests> @@ -212,10 +212,10 @@ **Output Options:** -- **Similarity output**: Creates similarity analysis with plots and text files showing intra-cluster similarity distributions -- **E-value output**: Creates E-value analysis with plots and text files showing E-value distributions -- **Count output**: Creates summary tables with annotated/unannotated read counts per cluster -- **Taxa output**: Creates taxonomic analysis determining the most likely taxa for each cluster +- **Cluster similarity output**: Creates similarity analysis with plots and text files showing intra-cluster similarity distributions +- **Cluster e-value output**: Creates E-value analysis with plots and text files showing E-value distributions +- **Cluster count output**: Creates summary tables with annotated/unannotated read counts per cluster +- **Taxa annotations output**: Creates taxonomic analysis determining the most likely taxa for each cluster **Parameters:** @@ -235,9 +235,23 @@ **Note**: The tool expects that sequence counts are included in the cluster file headers in the format "header(count)". +------------- + +.. class:: infomark + **Credits** -Authors = Onno de Gorter, 2025. + Based on a script by Nick Kortleven, translated, modified and wrapped by Onno de Gorter, -Developed for the New light on old remedies project, a PhD research by Anja Fischer +Developed for the New light on old remedies project, a PhD research by Anja Fischer. + +Link to the project website: + +* https://ahm.uva.nl/funded-research-projects/new-lights-on-old-remedies/new-lights-on-old-remedies.html + ]]></help> + <creator> + <organization name="Naturalis Biodiversity Center" url="https://www.naturalis.nl/en/science" /> + <person givenName="Onno" familyName="de Gorter" url="https://github.com/Onnodg"/> + <person givenName="Nick" familyName="Kortleven" url="https://github.com/tombkingsts" /> + </creator> </tool> \ No newline at end of file
