Mercurial > repos > shellac > sam_consensus_v3
annotate env/bin/dynamodb_load @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac | 
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 
0
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
1 #!/Users/cmdms/OneDrive-UOB/Development/Projects/2021/sam-consensus-v3/env/bin/python3 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
2 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
3 import argparse | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
4 import os | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
5 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
6 import boto | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
7 from boto.compat import json | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
8 from boto.compat import six | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
9 from boto.dynamodb.schema import Schema | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
10 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
11 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
12 DESCRIPTION = """Load data into one or more DynamoDB tables. | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
13 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
14 For each table, data is read from two files: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
15 - {table_name}.metadata for the table's name, schema and provisioned | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
16 throughput (only required if creating the table). | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
17 - {table_name}.data for the table's actual contents. | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
18 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
19 Both files are searched for in the current directory. To read them from | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
20 somewhere else, use the --in-dir parameter. | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
21 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
22 This program does not wipe the tables prior to loading data. However, any | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
23 items present in the data files will overwrite the table's contents. | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
24 """ | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
25 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
26 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
27 def _json_iterload(fd): | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
28 """Lazily load newline-separated JSON objects from a file-like object.""" | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
29 buffer = "" | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
30 eof = False | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
31 while not eof: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
32 try: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
33 # Add a line to the buffer | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
34 buffer += fd.next() | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
35 except StopIteration: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
36 # We can't let that exception bubble up, otherwise the last | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
37 # object in the file will never be decoded. | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
38 eof = True | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
39 try: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
40 # Try to decode a JSON object. | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
41 json_object = json.loads(buffer.strip()) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
42 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
43 # Success: clear the buffer (everything was decoded). | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
44 buffer = "" | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
45 except ValueError: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
46 if eof and buffer.strip(): | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
47 # No more lines to load and the buffer contains something other | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
48 # than whitespace: the file is, in fact, malformed. | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
49 raise | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
50 # We couldn't decode a complete JSON object: load more lines. | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
51 continue | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
52 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
53 yield json_object | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
54 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
55 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
56 def create_table(metadata_fd): | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
57 """Create a table from a metadata file-like object.""" | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
58 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
59 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
60 def load_table(table, in_fd): | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
61 """Load items into a table from a file-like object.""" | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
62 for i in _json_iterload(in_fd): | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
63 # Convert lists back to sets. | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
64 data = {} | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
65 for k, v in six.iteritems(i): | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
66 if isinstance(v, list): | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
67 data[k] = set(v) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
68 else: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
69 data[k] = v | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
70 table.new_item(attrs=data).put() | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
71 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
72 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
73 def dynamodb_load(tables, in_dir, create_tables): | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
74 conn = boto.connect_dynamodb() | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
75 for t in tables: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
76 metadata_file = os.path.join(in_dir, "%s.metadata" % t) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
77 data_file = os.path.join(in_dir, "%s.data" % t) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
78 if create_tables: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
79 with open(metadata_file) as meta_fd: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
80 metadata = json.load(meta_fd) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
81 table = conn.create_table( | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
82 name=t, | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
83 schema=Schema(metadata["schema"]), | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
84 read_units=metadata["read_units"], | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
85 write_units=metadata["write_units"], | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
86 ) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
87 table.refresh(wait_for_active=True) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
88 else: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
89 table = conn.get_table(t) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
90 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
91 with open(data_file) as in_fd: | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
92 load_table(table, in_fd) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
93 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
94 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
95 if __name__ == "__main__": | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
96 parser = argparse.ArgumentParser( | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
97 prog="dynamodb_load", | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
98 description=DESCRIPTION | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
99 ) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
100 parser.add_argument( | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
101 "--create-tables", | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
102 action="store_true", | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
103 help="Create the tables if they don't exist already (without this flag, attempts to load data into non-existing tables fail)." | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
104 ) | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
105 parser.add_argument("--in-dir", default=".") | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
106 parser.add_argument("tables", metavar="TABLES", nargs="+") | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
107 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
108 namespace = parser.parse_args() | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
109 | 
| 
 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 
shellac 
parents:  
diff
changeset
 | 
110 dynamodb_load(namespace.tables, namespace.in_dir, namespace.create_tables) | 
