annotate vgx_converter.py @ 17:96c070110fff draft default tip

Uploaded
author computationaltranscriptomics
date Sat, 04 Jun 2016 03:39:43 -0400
parents 567274fa974d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
1 #!/usr/bin/python
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
2
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
3 import sys #
14
0726953d86f3 Uploaded
computationaltranscriptomics
parents: 9
diff changeset
4 import csv #
8
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
5 import json
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
6 import shlex
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
7
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
8 def __main__():
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
9
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
10 arg_names = ['command', "network", "delimiter", "attributes", "output"]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
11 args = dict(zip(arg_names, sys.argv))
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
12 print args
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
13
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
14 idc = 0 # counter for the ids
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
15 ids = {} # dictionary with id and name correlation
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
16 keys = []
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
17
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
18 nodes = []
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
19 links = []
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
20
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
21 delim = {}
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
22 delim['tab'] = '\t'
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
23 delim['space'] = ' '
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
24
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
25 #################################################
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
26 data = {}
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
27 with open(sys.argv[1], 'r') as f:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
28 reader = csv.reader(f, delimiter=delim[sys.argv[2]])
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
29 for row in reader: # iterate through each line
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
30 p1 = shlex.split(row[0])[0]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
31 p2 = shlex.split(row[2])[0]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
32
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
33 if p1 not in ids:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
34 ids[p1] = idc
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
35 idc = idc + 1
9
26655854fac5 Uploaded
computationaltranscriptomics
parents: 8
diff changeset
36 nodes.append({'id': str(ids[p1]), 'label': p1 })
8
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
37 if p2 not in ids:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
38 ids[p2] = idc
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
39 idc = idc + 1
9
26655854fac5 Uploaded
computationaltranscriptomics
parents: 8
diff changeset
40 nodes.append({'id': str(ids[p2]), 'label': p2 })
8
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
41
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
42 links.append({'source': ids[p1], 'target': ids[p2]})
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
43 f.close()
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
44
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
45 data = { 'graph': {'nodes': nodes, 'edges': links}}
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
46
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
47 if args['attributes'] != 'None': # None for Galaxy
16
567274fa974d Uploaded
computationaltranscriptomics
parents: 14
diff changeset
48 #data['some'] = 'not in here'
8
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
49
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
50 # process attributes list
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
51 with open(sys.argv[3]) as f:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
52 reader = csv.reader(f, delimiter='\t')
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
53 ids_keys = ids.keys()
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
54 for row in reader:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
55 propscnt = 0
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
56 if row[0] in ids_keys:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
57 pos = ids[row[0]]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
58 #for props in range(len(row)-1):
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
59 # nodes[pos]['property'+str(propscnt)] = row[props+1]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
60 # propscnt = propscnt +1
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
61 metadata = {}
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
62 for props in range(len(row)-1):
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
63 metadata['property'+str(propscnt)] = row[props+1]
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
64 propscnt = propscnt +1
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
65 print metadata
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
66 nodes[pos]['metadata'] = metadata
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
67 f.close()
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
68
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
69 # write json data back to spec ified output file
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
70 with open(sys.argv[4], 'w') as out:
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
71 json.dump(data, out)
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
72 out.close()
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
73
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
74 if __name__ == "__main__":
1ae351222569 Uploaded
computationaltranscriptomics
parents:
diff changeset
75 __main__()