Mercurial > repos > ynewton > extract_tumor_vs_normal_tcga_ids
comparison get_normal_vs_tumor_sample_ids.py @ 0:72b0123a8587 draft
Uploaded
author | ynewton |
---|---|
date | Thu, 17 Jan 2013 20:07:57 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:72b0123a8587 |
---|---|
1 import optparse, re | |
2 parser = optparse.OptionParser() | |
3 parser.add_option("--in_matrix", dest="in_matrix", action="store", default="", help="") | |
4 parser.add_option("--out_normals", dest="out_normals", action="store", default="", help="") | |
5 parser.add_option("--out_tumor", dest="out_tumor", action="store", default="", help="") | |
6 opts, args = parser.parse_args() | |
7 | |
8 #process input arguments: | |
9 input_expression_file_name = opts.in_matrix | |
10 output_normal_file_name = opts.out_normals | |
11 output_tumor_file_name = opts.out_tumor | |
12 | |
13 normal_sample_re = re.compile('TCGA-\w\w-\w\w\w\w-1\d\w-.*') | |
14 tumor_sample_re = re.compile("TCGA-\w\w-\w\w\w\w-0\d\w-.*") | |
15 #TCGA-A2-A0D2-01A-21R-A034-07 | |
16 | |
17 normal_samples = [] | |
18 tumor_samples = [] | |
19 expression_file = open(input_expression_file_name, 'r') | |
20 line_number = 0 | |
21 for line in expression_file: | |
22 line_elems = line.strip().split("\t") | |
23 if line_number == 0: | |
24 #this is the header line, need to figure out what samples are normal and which are tumor | |
25 elem_counter = 1 | |
26 for elem in line_elems[1:]: | |
27 normal_re_match = normal_sample_re.match(elem) | |
28 if normal_re_match: | |
29 normal_samples.append(elem) | |
30 | |
31 tumor_re_match = tumor_sample_re.match(elem) | |
32 if tumor_re_match: | |
33 tumor_samples.append(elem) | |
34 | |
35 elem_counter += 1 | |
36 | |
37 else: | |
38 break | |
39 | |
40 line_number += 1 | |
41 expression_file.close() | |
42 | |
43 output_normal_file = open(output_normal_file_name, 'w') | |
44 print >> output_normal_file, "\n".join(normal_samples) | |
45 output_normal_file.close() | |
46 | |
47 output_tumor_file = open(output_tumor_file_name, 'w') | |
48 print >> output_tumor_file, "\n".join(tumor_samples) | |
49 output_tumor_file.close() |