Mercurial > repos > iuc > halla
diff halla.xml @ 0:cafea02ae3e0 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/halla commit 5cd01ad3808dff1ce4aae231706cbe2225079a04
| author | iuc |
|---|---|
| date | Wed, 05 Nov 2025 09:37:19 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/halla.xml Wed Nov 05 09:37:19 2025 +0000 @@ -0,0 +1,222 @@ +<tool id="halla" name="HAllA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> + <description>Hierarchical All-against-All association</description> + <macros> + <token name="@TOOL_VERSION@">0.8.40</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">24.0</token> + </macros> + <xrefs> + <xref type="bio.tools"></xref> + </xrefs> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">halla</requirement> + </requirements> + <version_command><![CDATA[halla --version]]></version_command> + <command detect_errors="exit_code"><![CDATA[ + halla + -x '$x' + --x_dataset_label + #if $x_dataset_label + '$x_dataset_label' + #else + '$x.element_identifier' + #end if + -y '$y' + --y_dataset_label + #if $y_dataset_label + '$y_dataset_label' + #else + '$y.element_identifier' + #end if + $alla + --max_freq_thresh $max_freq_thresh + #if $transform_data_funcs + --transform_data_funcs + #for $foo in $transform_data_funcs + $foo + #end for + #end if + $disable_bypass_discretization_if_possible + #if $discretize_func + --discretize_func $discretize_func + #end if + #if $discretize_num_bins + --discretize_num_bins $discretize_num_bins + #end if + --pdist_metric $pdist_metric + $sim2dist_disable_abs + --linkage_method $linkage_method + --permute_func $permute_func + --permute_iters $permute_iters + $disable_permute_speedup + --fdr_alpha $fdr_alpha + --fdr_method $fdr_method + --fnr_thresh $fnr_thresh + --rank_cluster $rank_cluster + #if $seed + --seed $seed + #end if + --block_num $block_num + $hallagram + $diagnostic_plot + -o output + --num_threads "\${GALAXY_SLOTS:-4}" + ]]></command> + <inputs> + <param argument="-x" type="data" format="tabular" label="Dataset X" help="Tabular dataset with p features/rows and n samples/columns" /> + <param argument="-y" type="data" format="tabular" label="Dataset Y" help="Tabular dataset with d features/rows and n samples/columns" /> + <param argument="--alla" type="boolean" truevalue="--alla" falsevalue="" checked="false" label="Use AllA instead of HAllA" help="HAllA uses a hierarchical approach for block association discovery on top of an existing all-against-all (AllA) association matrix. Use this option to skip the block association step." /> + <param argument="--max_freq_thresh" type="float" min="0" max="1" value="1" label="Maximum frequency" help="features with max frequences >= the threshold will be removed" /> + <param argument="--transform_data_funcs" type="select" optional="true" multiple="true" label="Continuous data transformation function"> + <option value="zscore">zscore</option> + <option value="rank">rank</option> + <option value="quantile">quantile</option> + </param> + + <param argument="--disable_bypass_discretization_if_possible" type="boolean" truevalue="--disable_bypass_discretization_if_possible" falsevalue="" checked="false" label="Discretize even if all features are continuous" help="By default discritization is bypassed if all features are continuous" /> + <param argument="--discretize_func" type="select" optional="true" label="Discretization function"> + <option value="quantile">quantile</option> + <option value="kmeans">kmeans</option> + <option value="uniform">uniform</option> + <option value="jenks">jenks</option> + </param> + <param argument="--discretize_num_bins" type="integer" min="0" value="" optional="true" label="Discretization - number of bins"/> + + <param argument="--pdist_metric" type="select" label="Distance/similarity metric" help="default: spearman for continuous data, If there is at least one categorical variable in either dataset, HAllA will shift to Normalized Mutual Information (NMI) as an alternative similarity measure."> + <option value="spearman" selected="true">Spearman</option> + <option value="pearson">Pearson</option> + <option value="dcor">Distance correlation</option> + <option value="mi">mutual information</option> + <option value="nmi">normalized mutual information</option> + <option value="xicor">xi correlation</option> + </param> + + <param argument="--sim2dist_disable_abs" type="boolean" truevalue="--sim2dist_disable_abs" falsevalue="" checked="false" label="Hierarchical clustering - disable setting similarity scores as absolute when computing distance" /> + <param argument="--linkage_method" type="select" label="Hierarchical clustering linkage method" help="see help below"> + <option value="single">single</option> + <option value="complete">complete</option> + <option value="average" selected="true">average</option> + <option value="weighted">weighted</option> + <option value="centroid">centroid</option> + <option value="median">median</option> + <option value="ward">ward</option> + </param> + + <param argument="--permute_func" type="select" label="P-value approximation function" help=" in the p-value permutation test"> + <option value="gpd">gdp</option> + <option value="ecdf">ecdf</option> + </param> + <param argument="--permute_iters" type="integer" min="0" value="1000" label="Number of iterations in the p-value permutation test"/> + <param argument="--disable_permute_speedup" type="boolean" truevalue="--disable_permute_speedup" falsevalue="" checked="false" label="Do not break early in the permutation test if p-value is insignificant"/> + <!-- \-\-force_permutations If turned on, force permutation testing --> + + <param argument="--fdr_alpha" type="float" min="0" max="1" value="0.05" label="FDR threshold"/> + <param argument="--fdr_method" type="select" label="FDR method" help="see help below"> + <option value="bonferroni">bonferroni: one-step correction</option> + <option value="sidak">sidak: one-step correction</option> + <option value="holm-sidak">holm-sidak: </option> + <option value="holm">holm: </option> + <option value="simes-hochberg">simes-hochberg: </option> + <option value="hommel">hommel: </option> + <option value="fdr_bh" selected="true">fdr_bh: Benjamini/Hochberg (non-negative)</option> + <option value="fdr_by">fdr_by: Benjamini/Yekutieli (negative)</option> + <option value="fdr_tsbh">fdr_tsbh: two stage fdr correction (non-negative)</option> + <option value="fdr_tsbky">fdr_tsbky: two stage fdr correction (non-negative)</option> + </param> + <param argument="--fnr_thresh" type="float" min="0" max="1" value="0.05" label="FNR threshold"/> + <param argument="--rank_cluster" type="select" label="Procedure to rank cluster using the p-values within the cluster"> + <option value="best" selected="true">best</option> + <option value="average">average</option> + </param> + <param argument="--seed" type="integer" value="" optional="true" label="Randomization seed" /> + + <param argument="--hallagram" type="boolean" truevalue="--hallagram" falsevalue="--no_hallagram" checked="true" label="Generate hallagram" /> + <param argument="--x_dataset_label" type="text" label="Hallagram/clustermap: label for X dataset" help="By default the dataset identifier is used"/> + <param argument="--y_dataset_label" type="text" label="Hallagram/clustermap: label for Y dataset" help="By default the dataset identifier is used"/> + <param argument="--block_num" type="integer" min="-1" value="-1" label="Number of top clusters in hallagram" help="-1: show all clusters"/> + + <param argument="--diagnostic_plot" type="boolean" truevalue="--diagnostic_plot" falsevalue="" checked="false" label="Generates diagnostic plot" /> + </inputs> + <outputs> + <data name="sig_clusters" format="tabular" from_work_dir="output/sig_clusters.txt" label="${tool.name} on ${on_string}: block associations"/> + <data name="all_associations" format="tabular" from_work_dir="output/all_associations.txt" label="${tool.name} on ${on_string}: all associations"/> + <data name="hallagram_out" format="pdf" from_work_dir="output/hallagram.pdf" label="${tool.name} on ${on_string}: hallagram"> + <filter>hallagram is True</filter> + </data> + <collection name="diagnostic_plot_out" format="pdf" type="list" label="${tool.name} on ${on_string}: diagnostic plots"> + <discover_datasets pattern="(?P<designation>association_.*)\.pdf" format="pdf" directory="output/diagnostic/" /> + <filter>diagnostic_plot is True</filter> + </collection> + </outputs> + <tests> + <test expect_num_outputs="3"> + <param name="x" value="X_16_100.txt"/> + <param name="y" value="Y_16_100.txt"/> + <param name="seed" value="42"/> + <output name="sig_clusters" value="sig_clusters.tsv"/> + <output name="all_associations" value="all_associations.tsv"/> + </test> + <test expect_num_outputs="3"> + <param name="x" value="X_16_100.txt"/> + <param name="y" value="Y_16_100.txt"/> + <param name="seed" value="42"/> + <param name="hallagram" value="false"/> + <param name="diagnostic_plot" value="true"/> + <output name="sig_clusters" value="sig_clusters.tsv"/> + <output name="all_associations" value="all_associations.tsv"/> + <output_collection name="diagnostic_plot_out" type="list" count="18"> + <element name="association_1" value="association_1.pdf"/> + </output_collection> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**What it does** + +HAllA (Hierarchical All-against-All association) is a method for finding blocks of associated features in high-dimensional datasets +measured from a common set of samples. HAllA operates by + +1. optionally discretizing mixed continuous and categorical features to a uniform representation +2. hierarchically clustering each dataset separately to generate a pair of data hierarchies, +3. performing all-against-all association testing between features across two datasets using robust measures of correlation, +4. determining the statistical significance of individual associations by permutation testing, and +5. iteratively subdividing the space of significant all-against-all correlations into blocks of densely associated occurring as clusters in the original datasets. + +Tutorial https://github.com/biobakery/biobakery/wiki/halla + +Usage +..... + +**Input** + +Data in scientific studies often come paired in the form of two high-dimensional datasets, where the dataset +X (with p features/rows and n samples/columns) are assumed to be p predictor variables (or features) measured +on n samples that give rise to d response variables contained in the dataset Y (with d features/rows and n samples/columns). +Note that column i of X is sampled jointly with column i of Y, so that X and Y are aligned. + +**Output** + +HAllA reports significant associations between clusters of related features ("block associations"). +Each block association is characterized by a cluster from the first dataset, a cluster from the second dataset, +and measures of statistical significance and effect size (p-value, q-value, and similarity score) for the cluster's +component pairwise associations. + +- **block associations** which reports block associations between the two datasets' features +- **all associations** which reports the pairwise similarity scores for all features across the two datasets +- **hallagram** graphical representation discovered block associations +- **diagnostic plots** (optional)lattice plot showing the pairwise associations between microbiome features and metadata for each significant cluster. + +**Notes** + +Details on the available: + +- Hierarchical clustering linkage methods https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html +- FDR methods https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html + + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btac232</citation> + </citations> +</tool> \ No newline at end of file
