Mercurial > repos > cpt > cpt_search_file
annotate editDB.py @ 2:21ae0e340d80 draft default tip
planemo upload commit f33bdf952d796c5d7a240b132af3c4cbd102decc
author | cpt |
---|---|
date | Fri, 05 Jan 2024 05:57:22 +0000 |
parents | 6e3a843b6304 |
children |
rev | line source |
---|---|
1
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1 # Remove duplicate terms. As well as add any that is needed. |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
2 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
3 import explodeJSON as ej |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
4 from explodeJSON import save_dict_to_json |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
5 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
6 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
7 ### create new key |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
8 def add_new_key(db, add_key=[]): |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
9 """Set of keys to add to the database""" |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
10 for new_key in add_key: |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
11 db[new_key] = [] |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
12 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
13 return db |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
14 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
15 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
16 ### Add values to dbase: |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
17 def add_value_to_term(index_val, db, add_value=[]): |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
18 """index value, put in value""" |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
19 for val in add_value: |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
20 db[index_val].append(val) |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
21 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
22 return db |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
23 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
24 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
25 ### Remove values from dbase: |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
26 def remove_value_from_term(index_val, db, remove_value=[]): |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
27 """remove values from list""" |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
28 for val in remove_value: |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
29 db[index_val].remove(val) |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
30 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
31 return db |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
32 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
33 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
34 ### Terms to add from a file |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
35 def add_from_file(input_file, index_val, db, sep="\n"): |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
36 """input file, new line separated currently, and append files to correct key, return is altered dictionary""" |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
37 terms = open(input_file).read().splitlines() |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
38 db = add_value_to_term(index_val, db, terms) |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
39 return db |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
40 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
41 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
42 if __name__ == "__main__": |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
43 |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
44 lysis_json = "data/lysis-family-v1.0.2.json" # insert json of choice |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
45 db = ej.explodeJSON(lysis_json) |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
46 db = db.readJSON() |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
47 # revise_db = add_new_key(db=db,add_key=["spanins"]) |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
48 # files = ["data/term_additions/200505_holin_domains.txt","data/term_additions/200505_Spanin_Domains.txt"] |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
49 terms = [ |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
50 "DUF2570", |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
51 "PF10828", |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
52 "IPR022538", |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
53 "DUF2514", |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
54 "PF10721", |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
55 "IPR019659", |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
56 "DUF2681", |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
57 "PF10883", |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
58 "IPR020274", |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
59 ] |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
60 # revise_db = add_from_file(files[0],"holin_domains",revise_db) |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
61 # revise_db = add_from_file(files[1],"spanin_domains",revise_db) |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
62 revise_db = add_value_to_term("spanin_domains", db, add_value=terms) |
6e3a843b6304
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
63 save_dict_to_json(obj=revise_db, filename="data/lysis-family-v1.0.3.json") |