annotate tools/data_source/microbial_import.py @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Script that imports locally stored data as a new dataset for the user
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 Usage: import id outputfile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 import sys, os
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 from shutil import copyfile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 BUFFER = 1048576
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 uids = sys.argv[1].split(",")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 out_file1 = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 #remove NONE from uids
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 have_none = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 while have_none:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 uids.remove('None')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 have_none = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 #create dictionary keyed by uid of tuples of (displayName,filePath,build) for all files
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 available_files = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 filename = sys.argv[-1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 for i, line in enumerate( file( filename ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 if not line or line[0:1] == "#" : continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 fields = line.split('\t')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 info_type = fields.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 if info_type.upper()=="DATA":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 uid = fields.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 org_num = fields.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 chr_acc = fields.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 feature = fields.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 filetype = fields.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 path = fields.pop(0).replace("\r","").replace("\n","")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 file_type = filetype
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 build = org_num
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 description = uid
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 available_files[uid]=(description,path,build,file_type,chr_acc)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 print >>sys.stderr, "It appears that the configuration file for this tool is missing."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 #create list of tuples of (displayName,FileName,build) for desired files
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 desired_files = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 for uid in uids:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 desired_files.append(available_files[uid])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 #copy first file to contents of given output file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 file1_copied = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 while not file1_copied:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 first_file = desired_files.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 print >>sys.stderr, "There were no valid files requested."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 file1_desc, file1_path, file1_build, file1_type,file1_chr_acc = first_file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 copyfile(file1_path,out_file1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 print "#File1\t"+file1_desc+"\t"+file1_chr_acc+"\t"+file1_build+"\t"+file1_type
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 file1_copied = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 print >>sys.stderr, "The file specified is missing."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 #print >>sys.stderr, "The file specified is missing."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 #Tell post-process filter where remaining files reside
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 for extra_output in desired_files:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 file_desc, file_path, file_build, file_type,file_chr_acc = extra_output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 print "#NewFile\t"+file_desc+"\t"+file_chr_acc+"\t"+file_build+"\t"+file_path+"\t"+file_type