0
|
1 import optparse, re
|
|
2 parser = optparse.OptionParser()
|
|
3 parser.add_option("--in_matrix", dest="in_matrix", action="store", default="", help="")
|
|
4 parser.add_option("--out_normals", dest="out_normals", action="store", default="", help="")
|
|
5 parser.add_option("--out_tumor", dest="out_tumor", action="store", default="", help="")
|
|
6 opts, args = parser.parse_args()
|
|
7
|
|
8 #process input arguments:
|
|
9 input_expression_file_name = opts.in_matrix
|
|
10 output_normal_file_name = opts.out_normals
|
|
11 output_tumor_file_name = opts.out_tumor
|
|
12
|
|
13 normal_sample_re = re.compile('TCGA-\w\w-\w\w\w\w-1\d\w-.*')
|
|
14 tumor_sample_re = re.compile("TCGA-\w\w-\w\w\w\w-0\d\w-.*")
|
|
15 #TCGA-A2-A0D2-01A-21R-A034-07
|
|
16
|
|
17 normal_samples = []
|
|
18 tumor_samples = []
|
|
19 expression_file = open(input_expression_file_name, 'r')
|
|
20 line_number = 0
|
|
21 for line in expression_file:
|
|
22 line_elems = line.strip().split("\t")
|
|
23 if line_number == 0:
|
|
24 #this is the header line, need to figure out what samples are normal and which are tumor
|
|
25 elem_counter = 1
|
|
26 for elem in line_elems[1:]:
|
|
27 normal_re_match = normal_sample_re.match(elem)
|
|
28 if normal_re_match:
|
|
29 normal_samples.append(elem)
|
|
30
|
|
31 tumor_re_match = tumor_sample_re.match(elem)
|
|
32 if tumor_re_match:
|
|
33 tumor_samples.append(elem)
|
|
34
|
|
35 elem_counter += 1
|
|
36
|
|
37 else:
|
|
38 break
|
|
39
|
|
40 line_number += 1
|
|
41 expression_file.close()
|
|
42
|
|
43 output_normal_file = open(output_normal_file_name, 'w')
|
|
44 print >> output_normal_file, "\n".join(normal_samples)
|
|
45 output_normal_file.close()
|
|
46
|
|
47 output_tumor_file = open(output_tumor_file_name, 'w')
|
|
48 print >> output_tumor_file, "\n".join(tumor_samples)
|
|
49 output_tumor_file.close() |