Mercurial > repos > ecology > ecoregion_cluster_estimate

diff Nb_cluster.xml @ 1:e94a25eed489 draft
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 459ba1277acd7d8d4a02f90dbd7ff444bf8eac92
author: ecology
date: Wed, 24 Jan 2024 15:53:32 +0000
parents: 0f6542d0986e
children: 001d7d101915
--- a/Nb_cluster.xml	Wed Oct 18 09:59:06 2023 +0000
+++ b/Nb_cluster.xml	Wed Jan 24 15:53:32 2024 +0000
@@ -21,9 +21,9 @@
     ]]>
     </command>
     <inputs>
-      <param name="envfile" type="data" format="txt,csv,tabular" label="Environment file"/>
-      <param name="taxafile" type="data" format="txt" label="Taxa selected file (List of taxa from TaxaSeeker tool)"/>
-      <param name="predictionfile" type="data" format="txt" multiple="true" label="Prediction files"/>
+      <param name="envfile" type="data" format="tabular" label="Environment file (tabular format only)" help="See example below"/>
+      <param name="taxafile" type="data" format="txt" label="Taxa selected file (File 'List of taxa' from TaxaSeeker tool)"/>
+      <param name="predictionfile" type="data" format="tabular" multiple="true" label="Prediction files"/>
       <param name="max_k" type="integer" value="2" min="1" label="Number of Cluster to test"/>
       <param name="metric" type="select" label="What metric to use to calculate dissimilarities between observations ?">
              <option value="manhattan">manhattan</option>
@@ -34,18 +34,22 @@
     </inputs>
     <outputs>
       <data name="output1" from_work_dir="Indices_SIH.png" format="png" label="SIH index plot"/>
-      <data name="output2" from_work_dir="data_to_clus.tsv" format="tsv" label="Data to cluster"/>
-      <data name="output3" from_work_dir="data_bio.tsv" format="tsv" label="Data.bio table "/>
+      <data name="output2" from_work_dir="data_to_clus.tsv" format="tabular" label="Data to cluster"/>
+      <data name="output3" from_work_dir="data_bio.tsv" format="tabular" label="Data.bio table "/>
     </outputs>
     <tests>
         <test>
-            <param name="envfile" value="ceamarc_env.csv"/>
+            <param name="envfile" value="ceamarc_env.tsv"/>
             <param name="taxafile" value="List_of_taxa.txt"/>
-            <param name="predictionfile" value="1_brts_pred_ceamarc.txt"/>
+            <param name="predictionfile" value="1_brts_pred_ceamarc.tsv"/>
             <param name='max_k' value="2"/>
             <param name='metric' value="manhattan"/>
             <param name='sample' value="10"/>
-            <output name='output1' value="SIH_index_plot.png"/>
+            <output name='output1'>
+                <assert_contents>
+            	    <has_size value="4297" delta="500"/>
+            	</assert_contents>
+            </output>
             <output name='output2' value="Data_to_cluster.tsv"/>
             <output name='output3' value="Data.bio_table.tsv"/>
         </test>
@@ -71,7 +75,7 @@
 
 - the sample size that will be used to perform clustering. Indeed, the clara function is used to clustering large data using a representative sample rather than the entire data set. This will speed up the clustering process and make the calculation more efficient. A fairly high value representative of the data is recommended. It is important to note that using too small a sample may result in loss of information compared to using the entire data set.
 
-The tool will produce three outputs. The first two are files that will be used in the rest of the workflow: a file containing four pieces of information, latitude, longitude, presence prediction and corresponding taxon, and a file containing the data to be partitioned. The third output corresponds to the main information of the tool, a graph presenting the value of the HIS index according to the number of clusters. The silhouette index provides a measure of the separation between clusters and the compactness within each cluster. The silhouette index ranges from -1 to 1. Values close to 1 indicate that objects are well grouped and separated from other clusters, while values close to -1 indicate that objects are poorly grouped and may be closer to other clusters. A value close to 0 indicates a situation where objects are located at the border between two neighboring clusters.
+The tool will produce three outputs. The first two are files that will be used in the rest of the workflow: a file containing four pieces of information, latitude, longitude, presence prediction and corresponding taxon, and a file containing the data to be partitioned. The third output corresponds to the main information of the tool, a graph presenting the value of the SIH index according to the number of clusters. The silhouette index provides a measure of the separation between clusters and the compactness within each cluster. The silhouette index ranges from -1 to 1. Values close to 1 indicate that objects are well grouped and separated from other clusters, while values close to -1 indicate that objects are poorly grouped and may be closer to other clusters. A value close to 0 indicates a situation where objects are located at the border between two neighboring clusters.
 
 **Example of the environemental file :**
author	ecology
date	Wed, 24 Jan 2024 15:53:32 +0000
parents	0f6542d0986e
children	001d7d101915