view association_rules.xml @ 3:01111436835d draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f031d8ddfb73cec24572648666ac44ee47f08aad
author bgruening
date Thu, 11 Aug 2022 09:38:31 +0000
parents af2624d5ab32
children
line wrap: on
line source

<tool id="sklearn_mlxtend_association_rules" name="Association rules" version="@VERSION@">
    <description>Extract frequent itemsets and generate association rules</description>
    <macros>
        <import>main_macros.xml</import>
    </macros>
    <expand macro="python_requirements"/>
    <expand macro="macro_stdio"/>
    <version_command>echo "@VERSION@"</version_command>
    <command detect_errors="exit_code"><![CDATA[
        python '$__tool_directory__/association_rules.py'
            --inputs '$inputs'
            --infile '$infile'
            --outfile '$outfile'
            #if $support
            --support '$support'
            #end if
            #if $confidence
            --confidence '$confidence'
            #end if
            #if $lift
            --lift '$lift'
            #end if
            #if $conviction
            --conviction '$conviction'
            #end if
            #if $length
            --length '$length'
            #end if
    ]]>
    </command>
    <configfiles>
        <inputs name="inputs" />
    </configfiles>
    <inputs>
        <param name="infile" type="data" format="tabular" label="Input file"/>
        <param name="header0" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Does the dataset contain header?"/>
        <param name="support" type="float" optional="true" label="Minimum support"/>
        <param name="confidence" type="float" optional="true" label="Minimum confidence"/>
        <param name="lift" type="float" optional="true" label="Minimum lift"/>
        <param name="conviction" type="float" optional="true" label="Minimum conviction"/>
        <param name="length" type="integer" optional="true" label="Maximum length"/>
    </inputs>
    <outputs>
        <data name="outfile" format="tabular"/>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="mba_input_str_w.tabular" ftype="tabular"/>
            <param name="header0" value="true"/>
            <param name="support" value="0.5"/>
            <param name="confidence" value="0.5"/>
            <param name="lift" value="1.1"/>
            <param name="conviction" value="1.1"/>
            <param name="length" value="5"/>
            <output name="outfile" file="mba_out_str.tabular" ftype="tabular"/>
        </test>
        <test>
            <param name="infile" value="mba_input_int_w.tabular" ftype="tabular"/>
            <param name="header0" value="true"/>
            <param name="support" value="0.5"/>
            <param name="confidence" value="0.5"/>
            <param name="lift" value="1.1"/>
            <param name="conviction" value="1.1"/>
            <param name="length" value="5"/>
            <output name="outfile" file="mba_output_int.tabular" ftype="tabular"/>
        </test>
        <test>
            <param name="infile" value="mba_input_str_wo.tabular" ftype="tabular"/>
            <param name="header0" value="false"/>
            <param name="support" value="0.5"/>
            <param name="confidence" value="0.5"/>
            <param name="lift" value="1.1"/>
            <param name="conviction" value="1.1"/>
            <param name="length" value="5"/>
            <output name="outfile" file="mba_output_str.tabular" ftype="tabular"/>
        </test>
        <test>
            <param name="infile" value="mba_input_int_wo.tabular" ftype="tabular"/>
            <param name="header0" value="false"/>
            <param name="support" value="0.5"/>
            <param name="confidence" value="0.5"/>
            <param name="lift" value="1.1"/>
            <param name="conviction" value="1.1"/>
            <param name="length" value="5"/>
            <output name="outfile" file="mba_output_int.tabular" ftype="tabular"/>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

Extract frequent itemsets and generate association rules

from mlxtend.frequent_patterns import fpgrowth

Extracts frequent itemsets for association rule mining. An itemset is considered as "frequent" if it 
meets a user-specified support threshold. For instance, if the support threshold is set to 0.5 (50%), 
a frequent itemset is defined as a set of items that occur together in at least 50% of all transactions 
in the database. We can only get itemsets that have a maximum number of items via length input parameter.

from mlxtend.frequent_patterns import association_rules

Generates association rules from frequent itemsets. Rule generation is a common task in the mining of 
frequent patterns. An association rule is an implication expression of the form X->Y, where X and Y 
are disjoint itemsets. A more concrete example based on consumer behaviour would be {Diapers}->{Beer} 
suggesting that people who buy diapers are also likely to buy beer. To evaluate the "interest" of 
such an association rule, different metrics have been developed, e.g., confidence, lift, and conviction.

Arguments

infile: Each line in infile contains (tab-separated) items in a tranasaction. Different lines/transactions 
can have differnt/varying number of items.

Returns

outfile: A tab separated file, that has an association rule on each line, with various metrics listed.

    ]]></help>
    <expand macro="sklearn_citation"/>
</tool>