Mercurial > repos > recetox > spec2vec_training
annotate spec2vec_training_wrapper.py @ 0:e1e22ada831e draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
author | recetox |
---|---|
date | Thu, 05 Jan 2023 10:08:12 +0000 |
parents | |
children |
rev | line source |
---|---|
0
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
1 #!/usr/bin/env python |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
2 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
3 import argparse |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
4 import sys |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
5 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
6 from matchms.importing import load_from_mgf, load_from_msp |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
7 from spec2vec import SpectrumDocument |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
8 from spec2vec.model_building import train_new_word2vec_model |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
9 from spec2vec.serialization import export_model |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
10 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
11 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
12 def read_spectra(spectra_file, file_format): |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
13 if file_format == "mgf": |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
14 return load_from_mgf(spectra_file) |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
15 elif file_format == "msp": |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
16 return load_from_msp(spectra_file) |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
17 else: |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
18 raise NotImplementedError(f"Unsupported file format: {file_format}.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
19 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
20 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
21 def parse_checkpoints_input(checkpoints_input): |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
22 checkpoints_str = checkpoints_input.replace(" ", "").split(",") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
23 try: |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
24 checkpoints_int = map(int, checkpoints_str) |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
25 except ValueError: |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
26 raise ValueError("Checkpoint values must be integers.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
27 return list(set(checkpoints_int)) |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
28 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
29 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
30 def main(argv): |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
31 parser = argparse.ArgumentParser(description="Train a spec2vec model.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
32 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
33 # Input data |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
34 parser.add_argument("--spectra_filename", type=str, help="Path to a file containing spectra.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
35 parser.add_argument("--spectra_fileformat", type=str, help="Spectra file format.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
36 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
37 # Training parameters |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
38 parser.add_argument("--epochs", type=int, default=0, help="Number of epochs to train the model.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
39 parser.add_argument("--checkpoints", type=str, default=None, help="Epochs after which to save the model.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
40 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
41 # Hyperparameters |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
42 parser.add_argument("--vector_size", type=int, default=100, help="Dimensionality of the feature vectors.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
43 parser.add_argument("--alpha", type=float, default=0.025, help="The initial learning rate.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
44 parser.add_argument("--window", type=int, default=5, help="The maximum distance between the current and predicted peak within a spectrum.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
45 parser.add_argument("--min_count", type=int, default=5, help="Ignores all peaks with total frequency lower than this.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
46 parser.add_argument("--sample", type=float, default=0.001, help="The threshold for configuring which higher-frequency peaks are randomly downsampled.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
47 parser.add_argument("--seed", type=int, default=1, help="A seed for model reproducibility.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
48 parser.add_argument("--min_alpha", type=float, default=0.0001, help="Learning rate will linearly drop to min_alpha as training progresses.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
49 parser.add_argument("--sg", type=int, default=0, help="Training algorithm: 1 for skip-gram; otherwise CBOW.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
50 parser.add_argument("--hs", type=int, default=0, help="If 1, hierarchical softmax will be used for model training. If set to 0, and negative is non-zero, negative sampling will be used.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
51 parser.add_argument("--negative", type=int, default=5, help="If > 0, negative sampling will be used, the int for negative specifies how many “noise words” should be drawn (usually between 5-20). If set to 0, no negative sampling is used.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
52 parser.add_argument("--ns_exponent", type=float, default=0.75, help="The exponent used to shape the negative sampling distribution.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
53 parser.add_argument("--cbow_mean", type=int, default=1, help="If 0, use the sum of the context word vectors. If 1, use the mean. Only applies when cbow is used.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
54 parser.add_argument("--sorted_vocab", type=bool, default=True, help="If 1, sort the vocabulary by descending frequency before assigning word indexes.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
55 parser.add_argument("--batch_words", type=int, default=10000, help="Target size (in words) for batches of examples passed to worker threads (and thus cython routines). Larger batches will be passed if individual texts are longer than 10000 words, but the standard cython code truncates to that maximum.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
56 parser.add_argument("--shrink_windows", type=bool, default=True, help="If 1, the input sentence will be truncated to the window size.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
57 parser.add_argument("--max_vocab_size", type=int, default=None, help="Limits the RAM during vocabulary building; if there are more unique words than this, then prune the infrequent ones. Every 10 million word types need about 1GB of RAM. Set to None for no limit (default).") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
58 parser.add_argument("--n_decimals", type=int, default=2, help="Rounds peak position to this number of decimals.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
59 parser.add_argument("--n_workers", type=int, default=1, help="Number of worker nodes to train the model.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
60 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
61 # Output files |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
62 parser.add_argument("--model_filename_pickle", type=str, help="If specified, the model will also be saved as a pickle file.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
63 parser.add_argument("--model_filename", type=str, help="Path to the output model json-file.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
64 parser.add_argument("--weights_filename", type=str, help="Path to the output weights json-file.") |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
65 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
66 args = parser.parse_args() |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
67 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
68 # Load the spectra |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
69 spectra = list(read_spectra(args.spectra_filename, args.spectra_fileformat)) |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
70 reference_documents = [SpectrumDocument(spectrum, n_decimals=args.n_decimals) for spectrum in spectra] |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
71 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
72 # Process epoch arguments |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
73 if args.checkpoints: |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
74 iterations = parse_checkpoints_input(args.checkpoints) |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
75 else: |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
76 iterations = args.epochs |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
77 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
78 # Train a model |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
79 model = train_new_word2vec_model( |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
80 documents=reference_documents, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
81 iterations=iterations, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
82 filename="spec2vec", |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
83 progress_logger=True, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
84 workers=args.n_workers, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
85 vector_size=args.vector_size, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
86 learning_rate_initial=args.alpha, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
87 learning_rate_decay=args.min_alpha, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
88 window=args.window, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
89 min_count=args.min_count, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
90 sample=args.sample, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
91 seed=args.seed, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
92 sg=args.sg, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
93 hs=args.hs, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
94 negative=args.negative, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
95 ns_exponent=args.ns_exponent, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
96 cbow_mean=args.cbow_mean, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
97 sorted_vocab=args.sorted_vocab, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
98 batch_words=args.batch_words, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
99 shrink_windows=args.shrink_windows, |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
100 max_vocab_size=args.max_vocab_size) |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
101 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
102 # Save the model |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
103 if args.model_filename_pickle: |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
104 model.save(args.model_filename_pickle) |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
105 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
106 export_model(model, args.model_filename, args.weights_filename) |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
107 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
108 |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
109 if __name__ == "__main__": |
e1e22ada831e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
110 main(argv=sys.argv[1:]) |