Mercurial > repos > onnodg > cdhit_analysis
comparison cdhit_analysis.xml @ 4:e64af72e1b8f draft default tip
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
| author | onnodg |
|---|---|
| date | Mon, 15 Dec 2025 16:44:40 +0000 |
| parents | c6981ea453ae |
| children |
comparison
equal
deleted
inserted
replaced
| 3:c6981ea453ae | 4:e64af72e1b8f |
|---|---|
| 1 <tool id="cdhit_cluster_analysis" name="CD-HIT Cluster Analysis" version="1.0.2"> | 1 <tool id="cdhit_cluster_analysis" name="CD-HIT Cluster Analysis" version="2.0.0"> |
| 2 <description>Analyze CD-HIT clustering results with taxonomic annotation</description> | 2 <description>Analyze CD-HIT clustering results with taxonomic annotation</description> |
| 3 | 3 |
| 4 <requirements> | 4 <requirements> |
| 5 <requirement type="package" version="3.12.3">python</requirement> | 5 <requirement type="package" version="3.12.3">python</requirement> |
| 6 <requirement type="package" version="3.10.6">matplotlib</requirement> | 6 <requirement type="package" version="3.10.6">matplotlib</requirement> |
| 10 | 10 |
| 11 <command detect_errors="exit_code"><![CDATA[ | 11 <command detect_errors="exit_code"><![CDATA[ |
| 12 bash '$__tool_directory__/cdhit_analysis.sh' | 12 bash '$__tool_directory__/cdhit_analysis.sh' |
| 13 --input_cluster '$input_cluster' | 13 --input_cluster '$input_cluster' |
| 14 --input_annotation '$input_annotation' | 14 --input_annotation '$input_annotation' |
| 15 | |
| 16 #if $output_options.similarity_output: | 15 #if $output_options.similarity_output: |
| 17 --output_similarity_txt '$similarity_txt' | 16 --output_similarity_txt '$similarity_txt' |
| 18 --output_similarity_plot '$similarity_plot' | 17 --output_similarity_plot '$similarity_plot' |
| 19 #end if | 18 #end if |
| 20 | 19 |
| 21 #if $output_options.evalue_output: | |
| 22 --output_evalue_txt '$evalue_txt' | |
| 23 --output_evalue_plot '$evalue_plot' | |
| 24 #end if | |
| 25 | |
| 26 #if $output_options.count_output: | 20 #if $output_options.count_output: |
| 27 --output_count '$cluster_count' | 21 --output_count '$cluster_count' |
| 28 #end if | 22 #end if |
| 29 | 23 |
| 30 #if $output_options.taxa_output: | 24 #if $output_options.taxa_output: |
| 31 --output_taxa_clusters '$cluster_taxa' | 25 --output_excel '$taxa_excel' |
| 32 --output_taxa_processed '$processed_taxa' | 26 #if $output_options.show_all: |
| 27 --output_taxa_clusters | |
| 28 #end if | |
| 29 #if $output_options.show_calculated: | |
| 30 --output_taxa_processed | |
| 31 #end if | |
| 33 #end if | 32 #end if |
| 33 --log_file '$log_file' | |
| 34 | 34 |
| 35 --simi_plot_y_min '$plot_params.simi_plot_y_min' | 35 --simi_plot_y_min '$plot_params.simi_plot_y_min' |
| 36 --simi_plot_y_max '$plot_params.simi_plot_y_max' | 36 --simi_plot_y_max '$plot_params.simi_plot_y_max' |
| 37 | 37 --min_cluster_support '$taxa_params.min_cluster_support' |
| 38 --uncertain_taxa_use_ratio '$taxa_params.uncertain_taxa_use_ratio' | 38 --uncertain_taxa_use_ratio '$taxa_params.uncertain_taxa_use_ratio' |
| 39 --min_to_split '$taxa_params.min_to_split' | 39 --min_to_split '$taxa_params.min_to_split' |
| 40 --min_count_to_split '$taxa_params.min_count_to_split' | 40 --min_count_to_split '$taxa_params.min_count_to_split' |
| 41 | 41 |
| 42 #if $processing_options.show_unannotated_clusters: | |
| 43 --show_unannotated_clusters | |
| 44 #end if | |
| 45 | |
| 46 #if $processing_options.make_taxa_in_cluster_split: | |
| 47 --make_taxa_in_cluster_split | |
| 48 #end if | |
| 49 | |
| 50 #if $processing_options.print_empty_files: | |
| 51 --print_empty_files | |
| 52 #end if | |
| 53 ]]></command> | 42 ]]></command> |
| 54 | 43 |
| 55 <inputs> | 44 <inputs> |
| 56 <param name="input_cluster" type="data" format="txt" label="CD-HIT cluster file" | 45 <param name="input_cluster" type="data" format="txt" label="CD-HIT cluster file" |
| 57 help="Output cluster file from cd-hit-est" /> | 46 help="Output cluster file from cd-hit-est" /> |
| 59 label="Excel Annotations file" | 48 label="Excel Annotations file" |
| 60 help="Excel workfile with annotations per header" /> | 49 help="Excel workfile with annotations per header" /> |
| 61 | 50 |
| 62 <section name="output_options" title="Output Options" expanded="true"> | 51 <section name="output_options" title="Output Options" expanded="true"> |
| 63 <param name="similarity_output" type="boolean" truevalue="true" falsevalue="false" | 52 <param name="similarity_output" type="boolean" truevalue="true" falsevalue="false" |
| 64 checked="true" label="Create cluster similarity output" | 53 checked="false" label="Create cluster similarity output" |
| 65 help="Generate similarity analysis and plots" /> | 54 help="Generate similarity analysis and plots" /> |
| 66 <param name="evalue_output" type="boolean" truevalue="true" falsevalue="false" | |
| 67 checked="true" label="Create cluster E-value output" | |
| 68 help="Generate E-value analysis and plots" /> | |
| 69 <param name="count_output" type="boolean" truevalue="true" falsevalue="false" | 55 <param name="count_output" type="boolean" truevalue="true" falsevalue="false" |
| 70 checked="true" label="Create cluster count output" | 56 checked="false" label="Create cluster count output" |
| 71 help="Generate read count summaries" /> | 57 help="Generate read count summaries" /> |
| 72 <param name="taxa_output" type="boolean" truevalue="true" falsevalue="false" | 58 <param name="taxa_output" type="boolean" truevalue="true" falsevalue="false" |
| 73 checked="true" label="Create taxa annotations output" | 59 checked="false" label="Create taxa annotations output" |
| 74 help="Generate taxonomic analysis" /> | 60 help="Generate taxonomic analysis" /> |
| 61 <param name="show_all" type="boolean" truevalue="true" falsevalue="false" | |
| 62 checked="false" label="Show all annotations per cluster" | |
| 63 help="Ouput all annotations found per cluster in the excel file" /> | |
| 64 <param name="show_calculated" type="boolean" truevalue="true" falsevalue="false" | |
| 65 checked="false" label="Show calculated annotations per cluster" | |
| 66 help="Output calculated annotations per cluster in the excel file" /> | |
| 75 </section> | 67 </section> |
| 76 | 68 |
| 77 <section name="plot_params" title="Plot Parameters" expanded="false"> | 69 <section name="plot_params" title="Plot Parameters" expanded="false"> |
| 78 <param name="simi_plot_y_min" type="float" value="95.0" min="0" max="100" | 70 <param name="simi_plot_y_min" type="float" value="95.0" min="0" max="100" |
| 79 label="Similarity plot Y-axis minimum" | 71 label="Similarity plot Y-axis minimum" |
| 85 | 77 |
| 86 <section name="taxa_params" title="Taxonomic Analysis Parameters" expanded="false"> | 78 <section name="taxa_params" title="Taxonomic Analysis Parameters" expanded="false"> |
| 87 <param name="uncertain_taxa_use_ratio" type="float" value="0.5" min="0" max="1" | 79 <param name="uncertain_taxa_use_ratio" type="float" value="0.5" min="0" max="1" |
| 88 label="Uncertain taxa ratio" | 80 label="Uncertain taxa ratio" |
| 89 help="Ratio at which uncertain taxa count toward the correct taxa" /> | 81 help="Ratio at which uncertain taxa count toward the correct taxa" /> |
| 90 <param name="min_to_split" type="float" value="0.45" min="0" max="1" | 82 <param name="min_to_split" type="float" value="0.45" min="0" max="0.5" |
| 91 label="Minimum percentage to split" | 83 label="Minimum percentage to split" |
| 92 help="Minimum percentage for taxonomic split" /> | 84 help="Minimum percentage the second most abundant taxon has to be for taxonomic split" /> |
| 93 <param name="min_count_to_split" type="integer" value="10" min="1" | 85 <param name="min_count_to_split" type="integer" value="10" min="1" |
| 94 label="Minimum count to split" | 86 label="Minimum count to split" |
| 95 help="Minimum count for taxonomic split" /> | 87 help="Minimum count for taxonomic split within clusters" /> |
| 96 </section> | 88 <param name="min_cluster_support" type="integer" value="1" min="1" |
| 97 | 89 label="Minimum cluster size" |
| 98 <section name="processing_options" title="Processing Options" expanded="false"> | 90 help="Clusters are ignored if they are smaller than this number" /> |
| 99 <param name="show_unannotated_clusters" type="boolean" truevalue="true" falsevalue="false" | |
| 100 checked="false" label="Show unannotated clusters" | |
| 101 help="Include unannotated clusters in output" /> | |
| 102 <param name="make_taxa_in_cluster_split" type="boolean" truevalue="true" falsevalue="false" | |
| 103 checked="false" label="Split clusters with multiple taxa" | |
| 104 help="Split clusters containing multiple taxa instead of marking as uncertain" /> | |
| 105 <param name="print_empty_files" type="boolean" truevalue="true" falsevalue="false" | |
| 106 checked="false" label="Print empty file messages" | |
| 107 help="Print messages about empty annotation files" /> | |
| 108 </section> | 91 </section> |
| 109 </inputs> | 92 </inputs> |
| 110 | 93 |
| 111 <outputs> | 94 <outputs> |
| 112 <data name="similarity_txt" format="txt" label="Similarity data" > | 95 <data name="similarity_txt" format="txt" label="Similarity data" > |
| 113 <filter>output_options['similarity_output']</filter> | 96 <filter>output_options['similarity_output']</filter> |
| 114 </data> | 97 </data> |
| 115 | |
| 116 <data name="similarity_plot" format="png" label="Similarity plot" > | 98 <data name="similarity_plot" format="png" label="Similarity plot" > |
| 117 <filter>output_options['similarity_output']</filter> | 99 <filter>output_options['similarity_output']</filter> |
| 118 </data> | 100 </data> |
| 119 | |
| 120 <data name="evalue_txt" format="txt" label="E-value data" > | |
| 121 <filter>output_options['evalue_output']</filter> | |
| 122 </data> | |
| 123 | |
| 124 <data name="evalue_plot" format="png" label="E-value plot" > | |
| 125 <filter>output_options['evalue_output']</filter> | |
| 126 </data> | |
| 127 | |
| 128 <data name="cluster_count" format="txt" label="Count summary" > | 101 <data name="cluster_count" format="txt" label="Count summary" > |
| 129 <filter>output_options['count_output']</filter> | 102 <filter>output_options['count_output']</filter> |
| 130 </data> | 103 </data> |
| 131 | 104 <data name="taxa_excel" format="xlsx" label="Taxon output per cluster" > |
| 132 <data name="cluster_taxa" format="xlsx" label="Raw taxa per cluster" > | |
| 133 <filter>output_options['taxa_output']</filter> | 105 <filter>output_options['taxa_output']</filter> |
| 134 </data> | 106 </data> |
| 135 | 107 <data name="log_file" format="txt" label="Log file"/> |
| 136 <data name="processed_taxa" format="xlsx" label="Processed taxa" > | |
| 137 <filter>output_options['taxa_output']</filter> | |
| 138 </data> | |
| 139 </outputs> | 108 </outputs> |
| 140 | 109 |
| 141 <tests> | 110 <tests> |
| 142 <test expect_num_outputs="7"> | |
| 143 <param name="input_cluster" value="29-test.clstr.txt" /> | |
| 144 <param name="input_annotation" value="header_anno_29_test.xlsx" /> | |
| 145 <section name="output_options"> | |
| 146 <param name="similarity_output" value="true" /> | |
| 147 <param name="evalue_output" value="true" /> | |
| 148 <param name="count_output" value="true" /> | |
| 149 <param name="taxa_output" value="true" /> | |
| 150 </section> | |
| 151 <output name="similarity_txt" file="sim_out.txt" /> | |
| 152 <output name="similarity_plot" file="sim_out.png" compare="sim_size"/> | |
| 153 <output name="evalue_txt" file="evalue_out.txt" /> | |
| 154 <output name="evalue_plot" file="evalue_out.png" compare="sim_size"/> | |
| 155 <output name="cluster_count" file="count_out.txt" /> | |
| 156 <output name="cluster_taxa" file="taxa_out.xlsx" decompress="true"/> | |
| 157 <output name="processed_taxa" file="processed.xlsx" decompress="true"/> | |
| 158 </test> | |
| 159 <test expect_num_outputs="7"> | |
| 160 <param name="input_cluster" value="input2_test.clstr.txt" /> | |
| 161 <param name="input_annotation" value="header_anno_excel.xlsx" /> | |
| 162 <section name="output_options"> | |
| 163 <param name="similarity_output" value="true" /> | |
| 164 <param name="evalue_output" value="true" /> | |
| 165 <param name="count_output" value="true" /> | |
| 166 <param name="taxa_output" value="true" /> | |
| 167 </section> | |
| 168 <output name="similarity_txt" file="test2_sim_out.txt" /> | |
| 169 <output name="similarity_plot" file="test2_sim_out.png" compare="sim_size"/> | |
| 170 <output name="evalue_txt" file="test2_evalue_out.txt" /> | |
| 171 <output name="evalue_plot" file="test2_evalue_out.png" compare="sim_size"/> | |
| 172 <output name="cluster_count" file="test_2count_out.txt" /> | |
| 173 <output name="cluster_taxa" file="test_2taxa_out.xlsx" decompress="true"/> | |
| 174 <output name="processed_taxa" file="test_2processed.xlsx" decompress="true"/> | |
| 175 </test> | |
| 176 <test expect_num_outputs="5"> | 111 <test expect_num_outputs="5"> |
| 177 <param name="input_cluster" value="input2_test.clstr.txt" /> | 112 <param name="input_cluster" value="prev_anno.txt" /> |
| 178 <param name="input_annotation" value="header_anno_excel.xlsx" /> | 113 <param name="input_annotation" value="prev4.xlsx" /> |
| 179 <section name="output_options"> | 114 <section name="output_options"> |
| 180 <param name="similarity_output" value="true" /> | 115 <param name="similarity_output" value="true" /> |
| 181 <param name="count_output" value="true" /> | 116 <param name="count_output" value="true" /> |
| 182 <param name="taxa_output" value="true" /> | 117 <param name="taxa_output" value="true" /> |
| 183 <param name="evalue_output" value="false" /> | 118 <param name="show_all" value="true" /> |
| 184 </section> | 119 <param name="show_calculated" value="true" /> |
| 185 <section name="processing_options"> | |
| 186 <param name="show_unannotated_clusters" value="true"/> | |
| 187 <param name="make_taxa_in_cluster_split" value="true"/> | |
| 188 <param name="print_empty_files" value="true"/> | |
| 189 </section> | 120 </section> |
| 190 <section name="taxa_params"> | 121 <section name="taxa_params"> |
| 191 <param name="uncertain_taxa_use_ratio" value="0.6"/> | 122 <param name="uncertain_taxa_use_ratio" value="0.5" /> |
| 192 <param name="min_to_split" value="0.6"/> | 123 <param name="min_to_split" value="0.45" /> |
| 193 <param name="min_count_to_split" value="6"/> | 124 <param name="min_count_to_split" value="10" /> |
| 125 <param name="min_cluster_support" value="1" /> | |
| 194 </section> | 126 </section> |
| 195 <section name="plot_params"> | 127 <section name="plot_params"> |
| 196 <param name="simi_plot_y_min" value="0.4" /> | 128 <param name="simi_plot_y_min" value="95" /> |
| 197 <param name="simi_plot_y_max" value="0.4" /> | 129 <param name="simi_plot_y_max" value="100" /> |
| 198 </section> | 130 </section> |
| 199 <output name="similarity_txt" file="test2_sim_extra_out.txt" /> | 131 <output name="log_file" file="test1_logs.txt"/> |
| 200 <output name="similarity_plot" file="test2_sim_extra_out.png" compare="sim_size"/> | 132 <output name="similarity_txt" file="test1_similarity.txt" /> |
| 201 <output name="cluster_count" file="test_2count_extra_out.txt" /> | 133 <output name="similarity_plot" file="test1_similarity.png" compare="sim_size" /> |
| 202 <output name="cluster_taxa" file="test_2taxa_extra_out.xlsx" decompress="true"/> | 134 <output name="cluster_count" file="test1_summary.txt" /> |
| 203 <output name="processed_taxa" file="test_2processed_extra.xlsx" decompress="true"/> | 135 <output name="taxa_excel" file="test1_un_report.xlsx" decompress="true" /> |
| 136 </test> | |
| 137 <test expect_num_outputs="5"> | |
| 138 <param name="input_cluster" value="test2_clusters.txt" /> | |
| 139 <param name="input_annotation" value="test2_annotations.xlsx" /> | |
| 140 <section name="output_options"> | |
| 141 <param name="similarity_output" value="true" /> | |
| 142 <param name="count_output" value="true" /> | |
| 143 <param name="taxa_output" value="true" /> | |
| 144 <param name="show_all" value="true" /> | |
| 145 <param name="show_calculated" value="true" /> | |
| 146 </section> | |
| 147 <section name="taxa_params"> | |
| 148 <param name="uncertain_taxa_use_ratio" value="0.5" /> | |
| 149 <param name="min_to_split" value="0.45" /> | |
| 150 <param name="min_count_to_split" value="10" /> | |
| 151 <param name="min_cluster_support" value="1" /> | |
| 152 </section> | |
| 153 <section name="plot_params"> | |
| 154 <param name="simi_plot_y_min" value="95" /> | |
| 155 <param name="simi_plot_y_max" value="100" /> | |
| 156 </section> | |
| 157 <output name="log_file" file="test2_logs.txt"/> | |
| 158 <output name="similarity_txt" file="test2_similarity.txt" /> | |
| 159 <output name="similarity_plot" file="test2_similarity.png" compare="sim_size" /> | |
| 160 <output name="cluster_count" file="test2_summary.txt" /> | |
| 161 <output name="taxa_excel" file="test2_un_report.xlsx" decompress="true" /> | |
| 162 </test> | |
| 163 <test expect_num_outputs="3"> | |
| 164 <param name="input_cluster" value="test2_clusters.txt" /> | |
| 165 <param name="input_annotation" value="test2_annotations.xlsx" /> | |
| 166 <section name="output_options"> | |
| 167 <param name="count_output" value="true" /> | |
| 168 <param name="taxa_output" value="true" /> | |
| 169 <param name="show_all" value="true" /> | |
| 170 <param name="show_calculated" value="false" /> | |
| 171 </section> | |
| 172 <section name="taxa_params"> | |
| 173 <param name="uncertain_taxa_use_ratio" value="0.2" /> | |
| 174 <param name="min_to_split" value="0.1" /> | |
| 175 <param name="min_count_to_split" value="3" /> | |
| 176 <param name="min_cluster_support" value="4" /> | |
| 177 </section> | |
| 178 <section name="plot_params"> | |
| 179 <param name="simi_plot_y_min" value="95" /> | |
| 180 <param name="simi_plot_y_max" value="100" /> | |
| 181 </section> | |
| 182 <output name="log_file" file="test3_logs.txt"/> | |
| 183 <output name="cluster_count" file="test3_summary.txt" /> | |
| 184 <output name="taxa_excel" file="test3_un_report.xlsx" decompress="true" /> | |
| 204 </test> | 185 </test> |
| 205 </tests> | 186 </tests> |
| 206 | 187 |
| 207 <help><