Mercurial > repos > ecology > ecoregion_cluster_estimate
comparison Nb_cluster.xml @ 1:e94a25eed489 draft
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 459ba1277acd7d8d4a02f90dbd7ff444bf8eac92
author | ecology |
---|---|
date | Wed, 24 Jan 2024 15:53:32 +0000 |
parents | 0f6542d0986e |
children | 001d7d101915 |
comparison
equal
deleted
inserted
replaced
0:0f6542d0986e | 1:e94a25eed489 |
---|---|
19 '$output2' | 19 '$output2' |
20 '$output3' | 20 '$output3' |
21 ]]> | 21 ]]> |
22 </command> | 22 </command> |
23 <inputs> | 23 <inputs> |
24 <param name="envfile" type="data" format="txt,csv,tabular" label="Environment file"/> | 24 <param name="envfile" type="data" format="tabular" label="Environment file (tabular format only)" help="See example below"/> |
25 <param name="taxafile" type="data" format="txt" label="Taxa selected file (List of taxa from TaxaSeeker tool)"/> | 25 <param name="taxafile" type="data" format="txt" label="Taxa selected file (File 'List of taxa' from TaxaSeeker tool)"/> |
26 <param name="predictionfile" type="data" format="txt" multiple="true" label="Prediction files"/> | 26 <param name="predictionfile" type="data" format="tabular" multiple="true" label="Prediction files"/> |
27 <param name="max_k" type="integer" value="2" min="1" label="Number of Cluster to test"/> | 27 <param name="max_k" type="integer" value="2" min="1" label="Number of Cluster to test"/> |
28 <param name="metric" type="select" label="What metric to use to calculate dissimilarities between observations ?"> | 28 <param name="metric" type="select" label="What metric to use to calculate dissimilarities between observations ?"> |
29 <option value="manhattan">manhattan</option> | 29 <option value="manhattan">manhattan</option> |
30 <option value="euclidean">euclidean</option> | 30 <option value="euclidean">euclidean</option> |
31 <option value="jaccard">jaccard</option> | 31 <option value="jaccard">jaccard</option> |
32 </param> | 32 </param> |
33 <param name="sample" type="integer" label="The number of samples to be drawn from the dataset" min="5" value="10"/> | 33 <param name="sample" type="integer" label="The number of samples to be drawn from the dataset" min="5" value="10"/> |
34 </inputs> | 34 </inputs> |
35 <outputs> | 35 <outputs> |
36 <data name="output1" from_work_dir="Indices_SIH.png" format="png" label="SIH index plot"/> | 36 <data name="output1" from_work_dir="Indices_SIH.png" format="png" label="SIH index plot"/> |
37 <data name="output2" from_work_dir="data_to_clus.tsv" format="tsv" label="Data to cluster"/> | 37 <data name="output2" from_work_dir="data_to_clus.tsv" format="tabular" label="Data to cluster"/> |
38 <data name="output3" from_work_dir="data_bio.tsv" format="tsv" label="Data.bio table "/> | 38 <data name="output3" from_work_dir="data_bio.tsv" format="tabular" label="Data.bio table "/> |
39 </outputs> | 39 </outputs> |
40 <tests> | 40 <tests> |
41 <test> | 41 <test> |
42 <param name="envfile" value="ceamarc_env.csv"/> | 42 <param name="envfile" value="ceamarc_env.tsv"/> |
43 <param name="taxafile" value="List_of_taxa.txt"/> | 43 <param name="taxafile" value="List_of_taxa.txt"/> |
44 <param name="predictionfile" value="1_brts_pred_ceamarc.txt"/> | 44 <param name="predictionfile" value="1_brts_pred_ceamarc.tsv"/> |
45 <param name='max_k' value="2"/> | 45 <param name='max_k' value="2"/> |
46 <param name='metric' value="manhattan"/> | 46 <param name='metric' value="manhattan"/> |
47 <param name='sample' value="10"/> | 47 <param name='sample' value="10"/> |
48 <output name='output1' value="SIH_index_plot.png"/> | 48 <output name='output1'> |
49 <assert_contents> | |
50 <has_size value="4297" delta="500"/> | |
51 </assert_contents> | |
52 </output> | |
49 <output name='output2' value="Data_to_cluster.tsv"/> | 53 <output name='output2' value="Data_to_cluster.tsv"/> |
50 <output name='output3' value="Data.bio_table.tsv"/> | 54 <output name='output3' value="Data.bio_table.tsv"/> |
51 </test> | 55 </test> |
52 </tests> | 56 </tests> |
53 <help><![CDATA[ | 57 <help><![CDATA[ |
69 | 73 |
70 - the metric used to calculate the dissimilarities between the observations: Manhattan, Euclidean and Jaccard | 74 - the metric used to calculate the dissimilarities between the observations: Manhattan, Euclidean and Jaccard |
71 | 75 |
72 - the sample size that will be used to perform clustering. Indeed, the clara function is used to clustering large data using a representative sample rather than the entire data set. This will speed up the clustering process and make the calculation more efficient. A fairly high value representative of the data is recommended. It is important to note that using too small a sample may result in loss of information compared to using the entire data set. | 76 - the sample size that will be used to perform clustering. Indeed, the clara function is used to clustering large data using a representative sample rather than the entire data set. This will speed up the clustering process and make the calculation more efficient. A fairly high value representative of the data is recommended. It is important to note that using too small a sample may result in loss of information compared to using the entire data set. |
73 | 77 |
74 The tool will produce three outputs. The first two are files that will be used in the rest of the workflow: a file containing four pieces of information, latitude, longitude, presence prediction and corresponding taxon, and a file containing the data to be partitioned. The third output corresponds to the main information of the tool, a graph presenting the value of the HIS index according to the number of clusters. The silhouette index provides a measure of the separation between clusters and the compactness within each cluster. The silhouette index ranges from -1 to 1. Values close to 1 indicate that objects are well grouped and separated from other clusters, while values close to -1 indicate that objects are poorly grouped and may be closer to other clusters. A value close to 0 indicates a situation where objects are located at the border between two neighboring clusters. | 78 The tool will produce three outputs. The first two are files that will be used in the rest of the workflow: a file containing four pieces of information, latitude, longitude, presence prediction and corresponding taxon, and a file containing the data to be partitioned. The third output corresponds to the main information of the tool, a graph presenting the value of the SIH index according to the number of clusters. The silhouette index provides a measure of the separation between clusters and the compactness within each cluster. The silhouette index ranges from -1 to 1. Values close to 1 indicate that objects are well grouped and separated from other clusters, while values close to -1 indicate that objects are poorly grouped and may be closer to other clusters. A value close to 0 indicates a situation where objects are located at the border between two neighboring clusters. |
75 | 79 |
76 **Example of the environemental file :** | 80 **Example of the environemental file :** |
77 | 81 |
78 +------+------+---------+------+--------------+-----+ | 82 +------+------+---------+------+--------------+-----+ |
79 | long | lat | Carbo | Grav | Maxbearing | ... | | 83 | long | lat | Carbo | Grav | Maxbearing | ... | |