Mercurial > repos > iss > eurl_vtec_wgs_pt
annotate scripts/ReMatCh/modules/checkMLST.py @ 1:444b0421bbdc draft
"planemo upload commit c8533b93fb2816db37887244489cbd6e919fc155"
| author | iss | 
|---|---|
| date | Tue, 22 Mar 2022 08:39:10 +0000 | 
| parents | c6bab5103a14 | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
1 import sys | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
2 import os | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
3 import urllib.request | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
4 import csv | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
5 from glob import glob | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
6 import re | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
7 import functools | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
8 try: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
9 import xml.etree.cElementTree as ET | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
10 except ImportError: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
11 import xml.etree.ElementTree as ET | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
12 from . import utils | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
13 from . import rematch_module | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
14 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
15 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
16 def determine_species(species): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
17 species = species.lower().split(' ') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
18 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
19 if len(species) >= 2: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
20 species = species[:2] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
21 if species[1] in ('spp', 'spp.', 'complex'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
22 species = [species[0]] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
23 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
24 return species | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
25 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
26 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
27 def check_existing_schema(species, schema_number, script_path): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
28 species = determine_species(species) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
29 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
30 if schema_number is None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
31 schema_number = '' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
32 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
33 schema_number = '#' + str(schema_number) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
34 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
35 mlst_schemas_folder = os.path.join(os.path.dirname(script_path), 'modules', 'mlst_schemas', '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
36 reference = [] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
37 files = [f for f in os.listdir(mlst_schemas_folder) if not f.startswith('.') and os.path.isfile(os.path.join(mlst_schemas_folder, f))] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
38 for file_found in files: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
39 file_path = os.path.join(mlst_schemas_folder, file_found) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
40 if file_found.startswith('_'.join(species) + schema_number) and file_found.endswith('.fasta'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
41 reference = file_path | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
42 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
43 if len(reference) > 1: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
44 if schema_number == '': | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
45 schema_number = '#1' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
46 for scheme in reference: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
47 if os.path.splitext(scheme)[0].endswith(schema_number): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
48 reference = [scheme] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
49 break | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
50 if len(reference) == 0: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
51 reference = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
52 elif len(reference) == 1: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
53 reference = reference[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
54 return reference | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
55 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
56 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
57 def write_mlst_reference(species, mlst_sequences, outdir, time_str): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
58 print('Writing MLST alleles as reference_sequences' + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
59 reference_file = os.path.join(outdir, str(species.replace('/', '_').replace(' ', '_') + '.' + time_str + '.fasta')) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
60 with open(reference_file, 'wt') as writer: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
61 for header, sequence in list(mlst_sequences.items()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
62 writer.write('>' + header + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
63 fasta_sequence_lines = rematch_module.chunkstring(sequence, 80) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
64 for line in fasta_sequence_lines: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
65 writer.write(line + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
66 return reference_file | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
67 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
68 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
69 def get_st(mlst_dicts, dict_sequences): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
70 SequenceDict = mlst_dicts[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
71 STdict = mlst_dicts[1] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
72 lociOrder = mlst_dicts[2] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
73 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
74 alleles_profile = ['-'] * len(lociOrder) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
75 for x, sequence_data in list(dict_sequences.items()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
76 if sequence_data['header'] not in SequenceDict: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
77 print(sequence_data['header'] + ' not found between consensus sequences!') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
78 break | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
79 if sequence_data['sequence'] in list(SequenceDict[sequence_data['header']].keys()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
80 allele_number = SequenceDict[sequence_data['header']][sequence_data['sequence']] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
81 alleles_profile[lociOrder.index(sequence_data['header'])] = allele_number | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
82 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
83 for sequence_st, allele_number in list(SequenceDict[sequence_data['header']].items()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
84 if sequence_st in sequence_data['sequence']: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
85 alleles_profile[lociOrder.index(sequence_data['header'])] = allele_number | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
86 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
87 alleles_profile = ','.join(alleles_profile) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
88 st = '-' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
89 if alleles_profile in STdict: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
90 st = STdict[alleles_profile] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
91 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
92 return st, alleles_profile | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
93 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
94 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
95 downloadPubMLST = functools.partial(utils.timer, name='Download PubMLST module') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
96 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
97 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
98 @downloadPubMLST | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
99 def download_pub_mlst_xml(originalSpecies, schema_number, outdir): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
100 print('Searching MLST database for ' + originalSpecies) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
101 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
102 xmlURL = 'http://pubmlst.org/data/dbases.xml' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
103 try: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
104 content = urllib.request.urlopen(xmlURL) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
105 xml = content.read() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
106 tree = ET.fromstring(xml) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
107 except: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
108 print("Ooops! There might be a problem with the PubMLST service, try later or check if the xml is well formated" | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
109 " at " + xmlURL) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
110 raise | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
111 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
112 xmlData = {} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
113 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
114 if schema_number is None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
115 schema_number = 1 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
116 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
117 success = 0 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
118 for scheme in tree.findall('species'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
119 species_scheme = scheme.text.rstrip('\r\n').rsplit('#', 1) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
120 number_scheme = species_scheme[1] if len(species_scheme) == 2 else 1 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
121 species_scheme = species_scheme[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
122 if determine_species(species_scheme) == determine_species(originalSpecies): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
123 if schema_number == number_scheme: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
124 success += 1 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
125 xmlData[scheme.text.strip()] = {} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
126 for info in scheme: # mlst | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
127 for database in info: # database | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
128 for retrievedDate in database.findall('retrieved'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
129 retrieved = retrievedDate.text | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
130 xmlData[scheme.text.strip()][retrieved] = [] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
131 for profile in database.findall('profiles'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
132 profileURl = profile.find('url').text | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
133 xmlData[scheme.text.strip()][retrieved].append(profileURl) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
134 for lociScheme in database.findall('loci'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
135 loci = {} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
136 for locus in lociScheme: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
137 locusID = locus.text | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
138 for locusInfo in locus: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
139 locusUrl = locusInfo.text | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
140 loci[locusID.strip()] = locusUrl | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
141 xmlData[scheme.text.strip()][retrieved].append(loci) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
142 if success == 0: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
143 sys.exit("\tError. No schema found for %s. Please refer to https://pubmlst.org/databases/" % (originalSpecies)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
144 elif success > 1: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
145 keys = list(xmlData.keys()) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
146 keys = sorted(keys) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
147 print("\tWarning. More than one schema found for %s. only keeping the first" | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
148 " one... %s" % (originalSpecies, keys[0])) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
149 for key in keys[1:]: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
150 del xmlData[key] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
151 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
152 pubmlst_dir = os.path.join(outdir, 'pubmlst', '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
153 if not os.path.isdir(pubmlst_dir): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
154 os.makedirs(pubmlst_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
155 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
156 for SchemaName, info in list(xmlData.items()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
157 STdict = {} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
158 SequenceDict = {} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
159 mlst_sequences = {} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
160 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
161 species_name = '_'.join(determine_species(SchemaName)).replace('/', '_') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
162 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
163 for RetrievalDate, URL in list(info.items()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
164 schema_date = species_name + '_' + RetrievalDate | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
165 outDit = os.path.join(pubmlst_dir, schema_date) # compatible with windows? See if it already exists, if so, break | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
166 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
167 if os.path.isdir(outDit): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
168 pickle = os.path.join(outDit, str(schema_date + '.pkl')) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
169 if os.path.isfile(pickle): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
170 print("\tschema files already exist for %s" % (SchemaName)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
171 mlst_dicts = utils.extract_variable_from_pickle(pickle) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
172 SequenceDict = mlst_dicts[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
173 for lociName, alleleSequences in list(SequenceDict.items()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
174 for sequence in alleleSequences: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
175 if lociName not in list(mlst_sequences.keys()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
176 mlst_sequences[lociName] = sequence | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
177 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
178 break | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
179 return mlst_dicts, mlst_sequences | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
180 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
181 elif any(species_name in x for x in os.listdir(pubmlst_dir)): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
182 print("Older version of %s's scheme found! Deleting..." % (SchemaName)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
183 for directory in glob(str(pubmlst_dir + str(species_name + '_*'))): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
184 utils.remove_directory(directory) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
185 os.makedirs(outDit) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
186 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
187 os.makedirs(outDit) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
188 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
189 contentProfile = urllib.request.urlopen(URL[0]) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
190 header = next(contentProfile).decode("utf8").strip().split('\t') # skip header | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
191 try: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
192 indexCC = header.index('clonal_complex') if 'clonal_complex' in header else header.index('CC') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
193 except: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
194 indexCC = len(header) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
195 lociOrder = header[1:indexCC] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
196 for row in contentProfile: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
197 row = row.decode("utf8").strip().split('\t') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
198 ST = row[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
199 alleles = ','.join(row[1:indexCC]) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
200 STdict[alleles] = ST | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
201 for lociName, lociURL in list(URL[1].items()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
202 if lociName not in list(SequenceDict.keys()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
203 SequenceDict[lociName] = {} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
204 url_file = os.path.join(outDit, lociURL.rsplit('/', 1)[1]) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
205 urllib.request.urlretrieve(lociURL, url_file) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
206 sequences, ignore, ignore = rematch_module.get_sequence_information(url_file, 0) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
207 for key in list(sequences.keys()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
208 header = re.sub("\D", "", sequences[key]['header']) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
209 sequence = sequences[key]['sequence'].upper() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
210 SequenceDict[lociName][sequence] = header | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
211 if lociName not in list(mlst_sequences.keys()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
212 mlst_sequences[lociName] = sequence | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
213 os.remove(url_file) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
214 mlst_dicts = [SequenceDict, STdict, lociOrder] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
215 utils.save_variable_to_pickle(mlst_dicts, outDit, schema_date) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
216 return mlst_dicts, mlst_sequences | 
