Mercurial > repos > bgruening > create_tool_recommendation_model
annotate main.py @ 6:e94dc7945639 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
| author | bgruening | 
|---|---|
| date | Sun, 16 Oct 2022 11:52:10 +0000 | 
| parents | 4f7e6612906b | 
| children | 
| rev | line source | 
|---|---|
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 1 """ | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 2 Predict next tools in the Galaxy workflows | 
| 6 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 3 using deep learning learning (Transformers) | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 4 """ | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 5 import argparse | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 6 import time | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 7 | 
| 5 
4f7e6612906b
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
 bgruening parents: 
4diff
changeset | 8 import extract_workflow_connections | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 9 import prepare_data | 
| 6 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 10 import train_transformer | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 11 | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 12 if __name__ == "__main__": | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 13 start_time = time.time() | 
| 2 
76251d1ccdcc
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
 bgruening parents: 
1diff
changeset | 14 | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 15 arg_parser = argparse.ArgumentParser() | 
| 6 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 16 arg_parser.add_argument("-wf", "--workflow_file", required=True, help="workflows tabular file") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 17 arg_parser.add_argument("-tu", "--tool_usage_file", required=True, help="tool usage file") | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 18 # data parameters | 
| 6 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 19 arg_parser.add_argument("-cd", "--cutoff_date", required=True, help="earliest date for taking tool usage") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 20 arg_parser.add_argument("-pl", "--maximum_path_length", required=True, help="maximum length of tool path") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 21 arg_parser.add_argument("-om", "--output_model", required=True, help="trained model path") | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 22 # neural network parameters | 
| 6 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 23 arg_parser.add_argument("-ti", "--n_train_iter", required=True, help="Number of training iterations run to create model") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 24 arg_parser.add_argument("-nhd", "--n_heads", required=True, help="Number of head in transformer's multi-head attention") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 25 arg_parser.add_argument("-ed", "--n_embed_dim", required=True, help="Embedding dimension") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 26 arg_parser.add_argument("-fd", "--n_feed_forward_dim", required=True, help="Feed forward network dimension") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 27 arg_parser.add_argument("-dt", "--dropout", required=True, help="Percentage of neurons to be dropped") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 28 arg_parser.add_argument("-lr", "--learning_rate", required=True, help="Learning rate") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 29 arg_parser.add_argument("-ts", "--te_share", required=True, help="Share of data to be used for testing") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 30 arg_parser.add_argument("-trbs", "--tr_batch_size", required=True, help="Train batch size") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 31 arg_parser.add_argument("-trlg", "--tr_logging_step", required=True, help="Train logging frequency") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 32 arg_parser.add_argument("-telg", "--te_logging_step", required=True, help="Test logging frequency") | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 33 arg_parser.add_argument("-tebs", "--te_batch_size", required=True, help="Test batch size") | 
| 2 
76251d1ccdcc
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
 bgruening parents: 
1diff
changeset | 34 | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 35 # get argument values | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 36 args = vars(arg_parser.parse_args()) | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 37 tool_usage_path = args["tool_usage_file"] | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 38 workflows_path = args["workflow_file"] | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 39 cutoff_date = args["cutoff_date"] | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 40 maximum_path_length = int(args["maximum_path_length"]) | 
| 6 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 41 | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 42 n_train_iter = int(args["n_train_iter"]) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 43 te_share = float(args["te_share"]) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 44 tr_batch_size = int(args["tr_batch_size"]) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 45 te_batch_size = int(args["te_batch_size"]) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 46 | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 47 n_heads = int(args["n_heads"]) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 48 feed_forward_dim = int(args["n_feed_forward_dim"]) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 49 embedding_dim = int(args["n_embed_dim"]) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 50 dropout = float(args["dropout"]) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 51 learning_rate = float(args["learning_rate"]) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 52 te_logging_step = int(args["te_logging_step"]) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 53 tr_logging_step = int(args["tr_logging_step"]) | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 54 trained_model_path = args["output_model"] | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 55 | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 56 config = { | 
| 6 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 57 'cutoff_date': cutoff_date, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 58 'maximum_path_length': maximum_path_length, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 59 'n_train_iter': n_train_iter, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 60 'n_heads': n_heads, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 61 'feed_forward_dim': feed_forward_dim, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 62 'embedding_dim': embedding_dim, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 63 'dropout': dropout, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 64 'learning_rate': learning_rate, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 65 'te_share': te_share, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 66 'te_logging_step': te_logging_step, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 67 'tr_logging_step': tr_logging_step, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 68 'tr_batch_size': tr_batch_size, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 69 'te_batch_size': te_batch_size, | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 70 'trained_model_path': trained_model_path | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 71 } | 
| 6 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 72 print("Preprocessing workflows...") | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 73 # Extract and process workflows | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 74 connections = extract_workflow_connections.ExtractWorkflowConnections() | 
| 6 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 75 # Process raw workflow file | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 76 wf_dataframe, usage_df = connections.process_raw_files(workflows_path, tool_usage_path, config) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 77 workflow_paths, pub_conn = connections.read_tabular_file(wf_dataframe, config) | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 78 # Process the paths from workflows | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 79 print("Dividing data...") | 
| 6 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 80 data = prepare_data.PrepareData(maximum_path_length, te_share) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 81 train_data, train_labels, test_data, test_labels, f_dict, r_dict, c_wts, c_tools, tr_tool_freq = data.get_data_labels_matrices(workflow_paths, usage_df, cutoff_date, pub_conn) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 82 print(train_data.shape, train_labels.shape, test_data.shape, test_labels.shape) | 
| 
e94dc7945639
planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 24bab7a797f53fe4bcc668b18ee0326625486164
 bgruening parents: 
5diff
changeset | 83 train_transformer.create_enc_transformer(train_data, train_labels, test_data, test_labels, f_dict, r_dict, c_wts, c_tools, pub_conn, tr_tool_freq, config) | 
| 0 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 84 end_time = time.time() | 
| 
9bf25dbe00ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
 bgruening parents: diff
changeset | 85 print("Program finished in %s seconds" % str(end_time - start_time)) | 
