view NSPDK_candidateClusters.xml @ 9:1b142e88d068 draft default tip

planemo upload for repository commit 89078a214cefd31d28da75ddebb21f546fba79df
author bgruening
date Tue, 18 Jun 2019 12:43:50 -0400
parents 90da74fd1c5c
line wrap: on
line source

<tool id="NSPDK_candidateClust" name="NSPDK_candidateClusters" version="" >
        <requirement type="package" version="0.6.0">graphclust-wrappers</requirement>
        <requirement type="package" version="9.2" >nspdk</requirement>
        <exit_code range="1:" />

        mkdir ./SVECTOR &&

        #for $ins in $data_svector:
            ln -f -s  '$ins' ./SVECTOR/${ins.element_identifier}.gspan &&
        #end for

        #if $iteration_num.iteration_num_selector:
        #end if
        <param type="data" name="data_svector"  format="zip" multiple="True"/>
        <param type="data" name="data_fasta" format="fasta" />
        <param type="data" name="data_names" format="txt" />
        <conditional name="iteration_num">
        <param name="iteration_num_selector" type="boolean"  checked="no" label="Multiple iterations"  help="for single iteration- NO, for multiple-YES"/>
        <when value="true">
                <param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/>
                <param type="data" name="blacklist" format="txt" />
                <param type="data" name="final_partition_soft" format="txt" />
                <param type="data" name="fast_cluster_last_round" format="txt" />
                <param name="GLOBAL_hit_blacklist_overlap" type="float" value="0.2" size="5" label="Blacklist hit overlap" />
            <when value="false" >
                <param name="CI" type="hidden" value="1" size="5" label="Number of current iteration "></param>
        <param name="max_rad" type="integer" value="3" size="5" label="maximum radius " help="-R"/>
        <param name="max_dist_relations" type="integer" value="3" size="5" label="maximum distance relations" help="-D"/>
        <param name="noCache" truevalue="-no-cache" falsevalue="" checked="True" type="boolean"
                label="Deactivate caching of kernel value computation  (-no-cache)" help="to minimize memory usage"/>
        <param name="ensf" type="integer" value="5" size="5" label="eccess neighbour size factor" help="-ensf"/>
        <param name="usn" truevalue="-usn" falsevalue="" checked="True" type="boolean"
                label="Use shared neighbourhood to weight center density (-usn)" help="by default true"/>
        <param name="oc" truevalue="-oc" falsevalue="" checked="True" type="boolean"
                label=" flag to output clusters (-oc)" help="by default true"/>
        <param name="knn" type="integer" value="20" size="5" label="Number of nearest neighbors" help="-knn num"/>
        <param name="nhf" type="integer" value="500" size="5" label="Number of hash functions " help="-nhf num"/>
        <param name="nspdk_nhf_max" type="integer" value="1000" size="5" label="Maximal number of hash functions " />
        <param name="nspdk_nhf_step" type="integer" value="25" size="5" label="Size of step for increasing hash functions " help="The number of hash functions is increased by this value after each iteration."/>
        <param name="GLOBAL_num_clusters" type="integer" value="100" size="5" label="Maxinum number of clusters " />
        <data name="fast_cluster" format="txt" from_work_dir="SVECTOR/data.svector.1.fast_cluster" label="fast_cluster.1"  >
            <filter> iteration_num['iteration_num_selector'] is False</filter>
        <data name="fast_cluster_sim" format="txt" from_work_dir="SVECTOR/data.svector.1.fast_cluster_sim" label="fast_cluster_sim.1" >
            <filter> iteration_num['iteration_num_selector'] is False </filter>
        <data name="black_list" format="txt" from_work_dir="SVECTOR/data.svector.blacklist.1" label="blacklist.1" >
            <filter> iteration_num['iteration_num_selector'] is False </filter>
        <data name="fast_cluster_m" format="txt" from_work_dir="SVECTOR/data.svector.*.fast_cluster" label="fast_cluster.$iteration_num.CI"  >
            <filter> iteration_num['iteration_num_selector'] is True</filter>
        <data name="fast_cluster_sim_m" format="txt" from_work_dir="SVECTOR/data.svector.*.fast_cluster_sim" label="fast_cluster_sim.$iteration_num.CI" >
            <filter> iteration_num['iteration_num_selector'] is True</filter>
        <data name="black_list_m" format="txt" from_work_dir="SVECTOR/data.svector.blacklist.*" label="blacklist.$iteration_num.CI" >
            <filter> iteration_num['iteration_num_selector'] is True</filter>
            <param name="data_fasta" value="data.fasta"/>
            <param name="data_names" value="data.names"/>
            <param name="data_svector" value=""  ftype="zip" />
            <conditional name="iteration_num">
                <param name="iteration_num_selector" value="false"/>
            <param name="noCache" value="-no-cache"/>
            <param name="ensf" value="5"/>
            <param name="oc" value="-oc"/>
            <param name="max_rad" value="3"/>
            <param name="max_dist_relations" value="3"/>
            <param name="usn" value="-usn"/>
            <param name="knn" value="20"/>
            <param name="nhf" value="500"/>
            <param name="nspdk_nhf_max" value="1000"/>
            <param name="nspdk_nhf_step" value="25"/>
            <param name="GLOBAL_num_clusters" value="100"/>
            <output name="fast_cluster" file="SVECTOR/data.svector.1.fast_cluster" />
            <output name="fast_cluster_sim" file="SVECTOR/data.svector.1.fast_cluster_sim" />
            <output name="black_list" file="SVECTOR/data.svector.blacklist.1" />

**What it does**

Copmutes global feature index and returns top dense sets.
The candidate clusters are chosen as the top ranking neighborhoods provided that the size of their overlap
is below a specified threshold.
For more information see *Fast neighborhood subgraph pairwise distance kernel* paper.


+ **-knn** : <num nearest neighbors> (default: 10)

+ **-otknn** : flag to output true (i.e. implies full kernel matrix evaluation) k-nearest neighburs (default: 0)

+ **-oc** : flag to output clusters (default: 0)

+ **-nhf** : <num hash functions> for the Locality Sensitive Hashing function (default: 250)

+ **-ensf** : <eccess neighbour size factor> (default: 10) (0 to avoid trimming)

+ **-usn** : use shared neighbourhood to weight center density (default: 0)

        <citation type="doi">10.1093/bioinformatics/bts224</citation>
        <citation type="bibtex">@inproceedings{costa2010fast,
        title={Fast neighborhood subgraph pairwise distance kernel},
        author={Costa, Fabrizio and De Grave, Kurt},
        booktitle={Proceedings of the 26th International Conference on Machine Learning},