Mercurial > repos > bgruening > sklearn_svm_classifier
annotate association_rules.py @ 21:14fa42b095c4 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
| author | bgruening | 
|---|---|
| date | Sat, 01 May 2021 01:53:41 +0000 | 
| parents | |
| children | b878e4cdd63a | 
| rev | line source | 
|---|---|
| 
21
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
1 import argparse | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
2 import json | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
3 import warnings | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
4 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
5 import pandas as pd | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
6 from mlxtend.frequent_patterns import association_rules, fpgrowth | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
7 from mlxtend.preprocessing import TransactionEncoder | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
8 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
9 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
10 def main(inputs, infile, outfile, min_support=0.5, min_confidence=0.5, min_lift=1.0, min_conviction=1.0, max_length=None): | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
11 """ | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
12 Parameter | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
13 --------- | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
14 input : str | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
15 File path to galaxy tool parameter | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
16 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
17 infile : str | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
18 File paths of input vector | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
19 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
20 outfile : str | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
21 File path to output matrix | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
22 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
23 min_support: float | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
24 Minimum support | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
25 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
26 min_confidence: float | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
27 Minimum confidence | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
28 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
29 min_lift: float | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
30 Minimum lift | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
31 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
32 min_conviction: float | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
33 Minimum conviction | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
34 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
35 max_length: int | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
36 Maximum length | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
37 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
38 """ | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
39 warnings.simplefilter('ignore') | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
40 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
41 with open(inputs, 'r') as param_handler: | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
42 params = json.load(param_handler) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
43 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
44 input_header = params['header0'] | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
45 header = 'infer' if input_header else None | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
46 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
47 with open(infile) as fp: | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
48 lines = fp.read().splitlines() | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
49 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
50 if header is not None: | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
51 lines = lines[1:] | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
52 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
53 dataset = [] | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
54 for line in lines: | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
55 line_items = line.split("\t") | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
56 dataset.append(line_items) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
57 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
58 # TransactionEncoder learns the unique labels in the dataset and transforms the | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
59 # input dataset (a Python list of lists) into a one-hot encoded NumPy boolean array | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
60 te = TransactionEncoder() | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
61 te_ary = te.fit_transform(dataset) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
62 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
63 # Turn the encoded NumPy array into a DataFrame | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
64 df = pd.DataFrame(te_ary, columns=te.columns_) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
65 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
66 # Extract frequent itemsets for association rule mining | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
67 # use_colnames: Use DataFrames' column names in the returned DataFrame instead of column indices | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
68 frequent_itemsets = fpgrowth(df, min_support=min_support, use_colnames=True, max_len=max_length) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
69 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
70 # Get association rules, with confidence larger than min_confidence | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
71 rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
72 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
73 # Filter association rules, keeping rules with lift and conviction larger than min_liftand and min_conviction | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
74 rules = rules[(rules['lift'] >= min_lift) & (rules['conviction'] >= min_conviction)] | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
75 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
76 # Convert columns from frozenset to list (more readable) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
77 rules['antecedents'] = rules['antecedents'].apply(list) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
78 rules['consequents'] = rules['consequents'].apply(list) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
79 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
80 # The next 3 steps are intended to fix the order of the association | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
81 # rules generated, so tests that rely on diff'ing a desired output | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
82 # with an expected output can pass | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
83 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
84 # 1) Sort entry in every row/column for columns 'antecedents' and 'consequents' | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
85 rules['antecedents'] = rules['antecedents'].apply(lambda row: sorted(row)) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
86 rules['consequents'] = rules['consequents'].apply(lambda row: sorted(row)) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
87 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
88 # 2) Create two temporary string columns to sort on | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
89 rules['ant_str'] = rules['antecedents'].apply(lambda row: " ".join(row)) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
90 rules['con_str'] = rules['consequents'].apply(lambda row: " ".join(row)) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
91 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
92 # 3) Sort results so they are re-producable | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
93 rules.sort_values(by=['ant_str', 'con_str'], inplace=True) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
94 del rules['ant_str'] | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
95 del rules['con_str'] | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
96 rules.reset_index(drop=True, inplace=True) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
97 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
98 # Write association rules and metrics to file | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
99 rules.to_csv(outfile, sep="\t", index=False) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
100 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
101 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
102 if __name__ == '__main__': | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
103 aparser = argparse.ArgumentParser() | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
104 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
105 aparser.add_argument("-y", "--infile", dest="infile", required=True) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
106 aparser.add_argument("-o", "--outfile", dest="outfile", required=True) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
107 aparser.add_argument("-s", "--support", dest="support", default=0.5) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
108 aparser.add_argument("-c", "--confidence", dest="confidence", default=0.5) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
109 aparser.add_argument("-l", "--lift", dest="lift", default=1.0) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
110 aparser.add_argument("-v", "--conviction", dest="conviction", default=1.0) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
111 aparser.add_argument("-t", "--length", dest="length", default=5) | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
112 args = aparser.parse_args() | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
113 | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
114 main(args.inputs, args.infile, args.outfile, | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
115 min_support=float(args.support), min_confidence=float(args.confidence), | 
| 
 
14fa42b095c4
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
 
bgruening 
parents:  
diff
changeset
 | 
116 min_lift=float(args.lift), min_conviction=float(args.conviction), max_length=int(args.length)) | 
