annotate vgx_converter.py @ 1:f009d54be92b draft

Uploaded
author computationaltranscriptomics
date Tue, 23 Feb 2016 09:03:17 -0500
parents fb26ae916886
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
1 #!/usr/bin/python
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
2
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
3 import sys #
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
4 import csv
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
5 import json
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
6 import shlex
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
7
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
8
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
9 def __main__():
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
10
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
11 arg_names = ['command', "network", "delimiter", "attributes", "output"]
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
12 args = dict(zip(arg_names, sys.argv))
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
13 print args
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
14
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
15 idc = 0 # counter for the ids
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
16 ids = {} # dictionary with id and name correlation
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
17 keys = []
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
18
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
19 nodes = []
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
20 links = []
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
21
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
22 delim = {}
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
23 delim['tab'] = '\t'
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
24 delim['space'] = ' '
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
25
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
26
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
27 #################################################
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
28 data = {}
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
29 with open(sys.argv[1], 'r') as f:
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
30 reader = csv.reader(f, delimiter=delim[sys.argv[2]])
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
31 for row in reader: # iterate through each line
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
32 p1 = shlex.split(row[0])[0]
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
33 p2 = shlex.split(row[2])[0]
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
34
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
35 if p1 not in ids:
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
36 ids[p1] = idc
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
37 idc = idc + 1
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
38 nodes.append({'id': ids[p1], 'name': p1 })
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
39 if p2 not in ids:
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
40 ids[p2] = idc
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
41 idc = idc + 1
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
42 nodes.append({'id': ids[p2], 'name': p2 })
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
43
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
44 links.append({'source': ids[p1], 'target': ids[p2]})
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
45 f.close()
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
46
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
47 data = {'nodes': nodes, 'links': links}
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
48
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
49 if args['attributes'] != 'None': # None for Galaxy
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
50 data['some'] = 'not in here'
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
51
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
52 # process attributes list
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
53 with open(sys.argv[3]) as f:
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
54 reader = csv.reader(f, delimiter='\t')
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
55 ids_keys = ids.keys()
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
56 for row in reader:
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
57 propscnt = 0
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
58 if row[0] in ids_keys:
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
59 pos = ids[row[0]]
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
60 for props in range(len(row)-1):
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
61 nodes[pos]['property'+str(propscnt)] = row[props+1]
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
62 propscnt = propscnt +1
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
63 f.close()
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
64
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
65 # write json data back to spec ified output file
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
66 with open(sys.argv[4], 'w') as out:
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
67 json.dump(data, out)
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
68 out.close()
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
69
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
70 if __name__ == "__main__":
fb26ae916886 Uploaded
computationaltranscriptomics
parents:
diff changeset
71 __main__()