annotate dexseq-hts_1.0/src/dexseq_count.py @ 11:cec4b4fb30be draft default tip

DEXSeq version 1.6 added
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 08:22:45 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
1 import sys, itertools, optparse
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
2
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
3 optParser = optparse.OptionParser(
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
4
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
5 usage = "python %prog [options] <flattened_gff_file> <sam_file> <output_file>",
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
6
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
7 description=
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
8 "This script counts how many reads in <sam_file> fall onto each exonic " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
9 "part given in <flattened_gff_file> and outputs a list of counts in " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
10 "<output_file>, for further analysis with the DEXSeq Bioconductor package. " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
11 "(Notes: The <flattened_gff_file> should be produced with the script " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
12 "dexseq_prepare_annotation.py). <sam_file> may be '-' to indicate standard input.",
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
13
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
14 epilog =
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
15 "Written by Simon Anders (sanders@fs.tum.de), European Molecular Biology " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
16 "Laboratory (EMBL). (c) 2010. Released under the terms of the GNU General " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
17 "Public License v3. Part of the 'DEXSeq' package." )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
18
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
19 optParser.add_option( "-p", "--paired", type="choice", dest="paired",
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
20 choices = ( "no", "yes" ), default = "no",
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
21 help = "'yes' or 'no'. Indicates whether the data is paired-end (default: no)" )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
22
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
23 optParser.add_option( "-s", "--stranded", type="choice", dest="stranded",
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
24 choices = ( "yes", "no", "reverse" ), default = "yes",
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
25 help = "'yes', 'no', or 'reverse'. Indicates whether the data is " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
26 "from a strand-specific assay (default: yes ). " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
27 "Be sure to switch to 'no' if you use a non strand-specific RNA-Seq library " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
28 "preparation protocol. 'reverse' inverts strands and is neede for certain " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
29 "protocols, e.g. paired-end with circularization." )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
30
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
31 optParser.add_option( "-a", "--minaqual", type="int", dest="minaqual",
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
32 default = 10,
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
33 help = "skip all reads with alignment quality lower than the given " +
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
34 "minimum value (default: 10)" )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
35
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
36 if len( sys.argv ) == 1:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
37 optParser.print_help()
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
38 sys.exit(1)
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
39
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
40 (opts, args) = optParser.parse_args()
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
41
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
42 if len( args ) != 3:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
43 sys.stderr.write( sys.argv[0] + ": Error: Please provide three arguments.\n" )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
44 sys.stderr.write( " Call with '-h' to get usage information.\n" )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
45 sys.exit( 1 )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
46
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
47 try:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
48 import HTSeq
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
49 except ImportError:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
50 sys.stderr.write( "Could not import HTSeq. Please install the HTSeq Python framework\n" )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
51 sys.stderr.write( "available from http://www-huber.embl.de/users/anders/HTSeq\n" )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
52 sys.exit(1)
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
53
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
54 gff_file = args[0]
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
55 sam_file = args[1]
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
56 out_file = args[2]
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
57 stranded = opts.stranded == "yes" or opts.stranded == "reverse"
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
58 reverse = opts.stranded == "reverse"
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
59 is_PE = opts.paired == "yes"
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
60 minaqual = opts.minaqual
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
61
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
62 if sam_file == "-":
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
63 sam_file = sys.stdin
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
64
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
65 # Step 1: Read in the GFF file as generated by aggregate_genes.py
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
66 # and put everything into a GenomicArrayOfSets
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
67
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
68 features = HTSeq.GenomicArrayOfSets( "auto", stranded=stranded )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
69 for f in HTSeq.GFF_Reader( gff_file ):
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
70 if f.type == "exonic_part":
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
71 f.name = f.attr['gene_id'] + ":" + f.attr['exonic_part_number']
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
72 features[f.iv] += f
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
73
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
74 # initialise counters
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
75 num_reads = 0
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
76 counts = {}
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
77 counts[ '_empty' ] = 0
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
78 counts[ '_ambiguous' ] = 0
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
79 counts[ '_lowaqual' ] = 0
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
80 counts[ '_notaligned' ] = 0
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
81
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
82 # put a zero for each feature ID
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
83 for iv, s in features.steps():
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
84 for f in s:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
85 counts[ f.name ] = 0
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
86
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
87 #We need this little helper below:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
88 def reverse_strand( s ):
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
89 if s == "+":
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
90 return "-"
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
91 elif s == "-":
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
92 return "+"
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
93 else:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
94 raise SystemError, "illegal strand"
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
95
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
96 # Now go through the aligned reads
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
97
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
98 if not is_PE:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
99
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
100 num_reads = 0
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
101 for a in HTSeq.SAM_Reader( sam_file ):
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
102 if not a.aligned:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
103 counts[ '_notaligned' ] += 1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
104 continue
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
105 if a.aQual < minaqual:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
106 counts[ '_lowaqual' ] += 1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
107 continue
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
108 rs = set()
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
109 for cigop in a.cigar:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
110 if cigop.type != "M":
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
111 continue
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
112 if reverse:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
113 cigop.ref_iv.strand = reverse_strand( cigop.ref_iv.strand )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
114 for iv, s in features[cigop.ref_iv].steps( ):
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
115 rs = rs.union( s )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
116 set_of_gene_names = set( [ f.name.split(":")[0] for f in rs ] )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
117 if len( set_of_gene_names ) == 0:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
118 counts[ '_empty' ] += 1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
119 elif len( set_of_gene_names ) > 1:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
120 counts[ '_ambiguous' ] +=1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
121 else:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
122 for f in rs:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
123 counts[ f.name ] += 1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
124 num_reads += 1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
125 if num_reads % 100000 == 0:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
126 sys.stdout.write( "%d reads processed.\n" % num_reads )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
127
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
128 else: # paired-end
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
129
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
130 num_reads = 0
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
131 for af, ar in HTSeq.pair_SAM_alignments( HTSeq.SAM_Reader( sam_file ) ):
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
132 rs = set()
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
133 if af and ar and not af.aligned and not ar.aligned:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
134 counts[ '_notaligned' ] += 1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
135 continue
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
136 if af and ar and not af.aQual < minaqual and ar.aQual < minaqual:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
137 counts[ '_lowaqual' ] += 1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
138 continue
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
139 if af and af.aligned and af.aQual >= minaqual and af.iv.chrom in features.chrom_vectors.keys():
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
140 for cigop in af.cigar:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
141 if cigop.type != "M":
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
142 continue
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
143 if reverse:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
144 cigop.ref_iv.strand = reverse_strand( cigop.ref_iv.strand )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
145 for iv, s in features[cigop.ref_iv].steps():
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
146 rs = rs.union( s )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
147 if ar and ar.aligned and ar.aQual >= minaqual and ar.iv.chrom in features.chrom_vectors.keys():
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
148 for cigop in ar.cigar:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
149 if cigop.type != "M":
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
150 continue
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
151 if not reverse:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
152 cigop.ref_iv.strand = reverse_strand( cigop.ref_iv.strand )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
153 for iv, s in features[cigop.ref_iv].steps():
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
154 rs = rs.union( s )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
155 set_of_gene_names = set( [ f.name.split(":")[0] for f in rs ] )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
156 if len( set_of_gene_names ) == 0:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
157 counts[ '_empty' ] += 1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
158 elif len( set_of_gene_names ) > 1:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
159 counts[ '_ambiguous' ] = 0
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
160 else:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
161 for f in rs:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
162 counts[ f.name ] += 1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
163 num_reads += 1
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
164 if num_reads % 100000 == 0:
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
165 sys.stderr.write( "%d reads processed.\n" % num_reads )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
166
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
167
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
168 # Step 3: Write out the results
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
169
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
170 fout = open( out_file, "w" )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
171 for fn in sorted( counts.keys() ):
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
172 fout.write( "%s\t%d\n" % ( fn, counts[fn] ) )
cec4b4fb30be DEXSeq version 1.6 added
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
173 fout.close()