comparison PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py @ 5:0c5867456e28 draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit ac4353ca5c0ac9ce60df9f4bf160ed08b99fbee3"
author jay
date Thu, 28 Jan 2021 04:10:34 +0000
parents fbd01647d7e6
children
comparison
equal deleted inserted replaced
4:ee0f96a30d36 5:0c5867456e28
9 9
10 parser.add_argument("-I", "--Input", required=True, default=None, help="Path to target fasta file") 10 parser.add_argument("-I", "--Input", required=True, default=None, help="Path to target fasta file")
11 parser.add_argument("-M", "--min_count", required=False, default=0, help="Path to target tsv file") 11 parser.add_argument("-M", "--min_count", required=False, default=0, help="Path to target tsv file")
12 parser.add_argument("-W", "--window", required=False, default=5, help="Path to target tsv file") 12 parser.add_argument("-W", "--window", required=False, default=5, help="Path to target tsv file")
13 parser.add_argument("-O", "--OutFile", required=False, default='model.txt', help="Path to target tsv file") 13 parser.add_argument("-O", "--OutFile", required=False, default='model.txt', help="Path to target tsv file")
14 parser.add_argument("-S", "--SG", required=False, default='skip-gram', help="Training algorithm: 1 for skip-gram; otherwise CBOW")
14 15
15 args = parser.parse_args() 16 args = parser.parse_args()
16 17
17 class ProteinSeq(object): 18 class ProteinSeq(object):
18 def __init__(self): 19 def __init__(self):
28 Ngram_list.append(tri_pep) 29 Ngram_list.append(tri_pep)
29 yield Ngram_list 30 yield Ngram_list
30 #min_count = 0 31 #min_count = 0
31 size = 200 32 size = 200
32 #window = 5 33 #window = 5
33 sg = 1 34
35 print (args.SG)
36 if args.SG == 'skip-gram':
37 SG = 1
38 elif args.SG == 'CBOW':
39 SG = 0
34 40
35 sentences = ProteinSeq() 41 sentences = ProteinSeq()
36 model = gensim.models.Word2Vec(sentences, min_count=int(args.min_count), size=size, window=int(args.window), sg = sg, workers = 10) 42 model = gensim.models.Word2Vec(sentences, min_count=int(args.min_count), size=size, window=int(args.window), sg = SG, workers = 10)
37 model.wv.save_word2vec_format(args.OutFile, binary=False) 43 model.wv.save_word2vec_format(args.OutFile, binary=False)
38 44