annotate vgx_converter.py @ 15:cb4dce1042ef draft

Changed VGX to JGF
author computationaltranscriptomics
date Thu, 02 Jun 2016 09:32:40 -0400
parents 0726953d86f3
children 567274fa974d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
1 #!/usr/bin/python
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
2
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
3 import sys #
14
0726953d86f3 Uploaded
computationaltranscriptomics
parents: 9
diff changeset
4 import csv #
8
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
5 import json
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
6 import shlex
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
7
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
8
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
9 def __main__():
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
10
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
11 arg_names = ['command', "network", "delimiter", "attributes", "output"]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
12 args = dict(zip(arg_names, sys.argv))
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
13 print args
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
14
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
15 idc = 0 # counter for the ids
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
16 ids = {} # dictionary with id and name correlation
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
17 keys = []
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
18
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
19 nodes = []
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
20 links = []
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
21
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
22 delim = {}
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
23 delim['tab'] = '\t'
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
24 delim['space'] = ' '
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
25
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
26 #################################################
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
27 data = {}
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
28 with open(sys.argv[1], 'r') as f:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
29 reader = csv.reader(f, delimiter=delim[sys.argv[2]])
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
30 for row in reader: # iterate through each line
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
31 p1 = shlex.split(row[0])[0]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
32 p2 = shlex.split(row[2])[0]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
33
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
34 if p1 not in ids:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
35 ids[p1] = idc
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
36 idc = idc + 1
9
26655854fac5 Uploaded
computationaltranscriptomics
parents: 8
diff changeset
37 nodes.append({'id': str(ids[p1]), 'label': p1 })
8
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
38 if p2 not in ids:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
39 ids[p2] = idc
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
40 idc = idc + 1
9
26655854fac5 Uploaded
computationaltranscriptomics
parents: 8
diff changeset
41 nodes.append({'id': str(ids[p2]), 'label': p2 })
8
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
42
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
43 links.append({'source': ids[p1], 'target': ids[p2]})
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
44 f.close()
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
45
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
46 data = { 'graph': {'nodes': nodes, 'edges': links}}
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
47
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
48 if args['attributes'] != 'None': # None for Galaxy
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
49 data['some'] = 'not in here'
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
50
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
51 # process attributes list
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
52 with open(sys.argv[3]) as f:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
53 reader = csv.reader(f, delimiter='\t')
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
54 ids_keys = ids.keys()
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
55 for row in reader:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
56 propscnt = 0
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
57 if row[0] in ids_keys:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
58 pos = ids[row[0]]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
59 #for props in range(len(row)-1):
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
60 # nodes[pos]['property'+str(propscnt)] = row[props+1]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
61 # propscnt = propscnt +1
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
62 metadata = {}
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
63 for props in range(len(row)-1):
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
64 metadata['property'+str(propscnt)] = row[props+1]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
65 propscnt = propscnt +1
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
66 print metadata
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
67 nodes[pos]['metadata'] = metadata
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
68 f.close()
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
69
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
70 # write json data back to spec ified output file
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
71 with open(sys.argv[4], 'w') as out:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
72 json.dump(data, out)
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
73 out.close()
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
74
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
75 if __name__ == "__main__":
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
76 __main__()