annotate spec2vec_training_wrapper.py @ 0:e1e22ada831e draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
author recetox
date Thu, 05 Jan 2023 10:08:12 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
1 #!/usr/bin/env python
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
2
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
3 import argparse
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
4 import sys
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
5
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
6 from matchms.importing import load_from_mgf, load_from_msp
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
7 from spec2vec import SpectrumDocument
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
8 from spec2vec.model_building import train_new_word2vec_model
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
9 from spec2vec.serialization import export_model
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
10
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
11
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
12 def read_spectra(spectra_file, file_format):
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
13 if file_format == "mgf":
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
14 return load_from_mgf(spectra_file)
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
15 elif file_format == "msp":
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
16 return load_from_msp(spectra_file)
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
17 else:
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
18 raise NotImplementedError(f"Unsupported file format: {file_format}.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
19
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
20
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
21 def parse_checkpoints_input(checkpoints_input):
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
22 checkpoints_str = checkpoints_input.replace(" ", "").split(",")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
23 try:
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
24 checkpoints_int = map(int, checkpoints_str)
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
25 except ValueError:
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
26 raise ValueError("Checkpoint values must be integers.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
27 return list(set(checkpoints_int))
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
28
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
29
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
30 def main(argv):
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
31 parser = argparse.ArgumentParser(description="Train a spec2vec model.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
32
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
33 # Input data
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
34 parser.add_argument("--spectra_filename", type=str, help="Path to a file containing spectra.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
35 parser.add_argument("--spectra_fileformat", type=str, help="Spectra file format.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
36
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
37 # Training parameters
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
38 parser.add_argument("--epochs", type=int, default=0, help="Number of epochs to train the model.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
39 parser.add_argument("--checkpoints", type=str, default=None, help="Epochs after which to save the model.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
40
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
41 # Hyperparameters
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
42 parser.add_argument("--vector_size", type=int, default=100, help="Dimensionality of the feature vectors.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
43 parser.add_argument("--alpha", type=float, default=0.025, help="The initial learning rate.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
44 parser.add_argument("--window", type=int, default=5, help="The maximum distance between the current and predicted peak within a spectrum.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
45 parser.add_argument("--min_count", type=int, default=5, help="Ignores all peaks with total frequency lower than this.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
46 parser.add_argument("--sample", type=float, default=0.001, help="The threshold for configuring which higher-frequency peaks are randomly downsampled.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
47 parser.add_argument("--seed", type=int, default=1, help="A seed for model reproducibility.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
48 parser.add_argument("--min_alpha", type=float, default=0.0001, help="Learning rate will linearly drop to min_alpha as training progresses.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
49 parser.add_argument("--sg", type=int, default=0, help="Training algorithm: 1 for skip-gram; otherwise CBOW.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
50 parser.add_argument("--hs", type=int, default=0, help="If 1, hierarchical softmax will be used for model training. If set to 0, and negative is non-zero, negative sampling will be used.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
51 parser.add_argument("--negative", type=int, default=5, help="If > 0, negative sampling will be used, the int for negative specifies how many “noise words” should be drawn (usually between 5-20). If set to 0, no negative sampling is used.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
52 parser.add_argument("--ns_exponent", type=float, default=0.75, help="The exponent used to shape the negative sampling distribution.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
53 parser.add_argument("--cbow_mean", type=int, default=1, help="If 0, use the sum of the context word vectors. If 1, use the mean. Only applies when cbow is used.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
54 parser.add_argument("--sorted_vocab", type=bool, default=True, help="If 1, sort the vocabulary by descending frequency before assigning word indexes.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
55 parser.add_argument("--batch_words", type=int, default=10000, help="Target size (in words) for batches of examples passed to worker threads (and thus cython routines). Larger batches will be passed if individual texts are longer than 10000 words, but the standard cython code truncates to that maximum.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
56 parser.add_argument("--shrink_windows", type=bool, default=True, help="If 1, the input sentence will be truncated to the window size.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
57 parser.add_argument("--max_vocab_size", type=int, default=None, help="Limits the RAM during vocabulary building; if there are more unique words than this, then prune the infrequent ones. Every 10 million word types need about 1GB of RAM. Set to None for no limit (default).")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
58 parser.add_argument("--n_decimals", type=int, default=2, help="Rounds peak position to this number of decimals.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
59 parser.add_argument("--n_workers", type=int, default=1, help="Number of worker nodes to train the model.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
60
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
61 # Output files
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
62 parser.add_argument("--model_filename_pickle", type=str, help="If specified, the model will also be saved as a pickle file.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
63 parser.add_argument("--model_filename", type=str, help="Path to the output model json-file.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
64 parser.add_argument("--weights_filename", type=str, help="Path to the output weights json-file.")
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
65
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
66 args = parser.parse_args()
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
67
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
68 # Load the spectra
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
69 spectra = list(read_spectra(args.spectra_filename, args.spectra_fileformat))
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
70 reference_documents = [SpectrumDocument(spectrum, n_decimals=args.n_decimals) for spectrum in spectra]
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
71
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
72 # Process epoch arguments
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
73 if args.checkpoints:
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
74 iterations = parse_checkpoints_input(args.checkpoints)
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
75 else:
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
76 iterations = args.epochs
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
77
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
78 # Train a model
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
79 model = train_new_word2vec_model(
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
80 documents=reference_documents,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
81 iterations=iterations,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
82 filename="spec2vec",
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
83 progress_logger=True,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
84 workers=args.n_workers,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
85 vector_size=args.vector_size,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
86 learning_rate_initial=args.alpha,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
87 learning_rate_decay=args.min_alpha,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
88 window=args.window,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
89 min_count=args.min_count,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
90 sample=args.sample,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
91 seed=args.seed,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
92 sg=args.sg,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
93 hs=args.hs,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
94 negative=args.negative,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
95 ns_exponent=args.ns_exponent,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
96 cbow_mean=args.cbow_mean,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
97 sorted_vocab=args.sorted_vocab,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
98 batch_words=args.batch_words,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
99 shrink_windows=args.shrink_windows,
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
100 max_vocab_size=args.max_vocab_size)
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
101
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
102 # Save the model
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
103 if args.model_filename_pickle:
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
104 model.save(args.model_filename_pickle)
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
105
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
106 export_model(model, args.model_filename, args.weights_filename)
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
107
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
108
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
109 if __name__ == "__main__":
e1e22ada831e planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff changeset
110 main(argv=sys.argv[1:])