Mercurial > repos > vipints > rdiff
comparison rDiff/src/get_reads_caller.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0f80a5141704 |
---|---|
1 function [COUNTS]=get_reads_caller(PAR) | |
2 | |
3 CFG = PAR.CFG; | |
4 genes = PAR.genes; | |
5 clear PAR; | |
6 | |
7 % add paths | |
8 addpath(CFG.paths); | |
9 | |
10 data_dir=CFG.data_dir; | |
11 OUT_STR=[]; | |
12 | |
13 COUNTS=cell(size(genes,2),1); | |
14 | |
15 | |
16 % NON_PARAM_SAMPLE contains the read start density | |
17 if CFG.perform_nonparametric | |
18 NON_PARAM_SAMPLE=sparse([],[],[],10000,1,5000); | |
19 end | |
20 | |
21 | |
22 for i=1:size(genes,2) | |
23 | |
24 | |
25 | |
26 %TEMP_COUNT contins the counts for the current gene | |
27 TEMP_COUNT=cell(1,7); | |
28 gene = genes(i); | |
29 | |
30 %set default return values | |
31 TEMP_COUNT{1}=gene.name; | |
32 TEMP_COUNT{2}=[]; | |
33 TEMP_COUNT{3}=[]; | |
34 TEMP_COUNT{4}=[]; | |
35 TEMP_COUNT{5}=[]; | |
36 TEMP_COUNT{6}=[]; | |
37 | |
38 %check that the gene has exons defined | |
39 if isempty(gene.exons) | |
40 STAT{i}=TEMP_COUNT; | |
41 continue; | |
42 end | |
43 | |
44 %check that the gene is longer than the Reads. Otherwise the | |
45 %definition of regions does not makes sense | |
46 if gene.stop-gene.start<CFG.sequenced_length+3 | |
47 STAT{i}=TEMP_COUNT; | |
48 continue; | |
49 end | |
50 | |
51 %Get the reads from gene | |
52 [reads] = get_reads_for_gene(CFG,gene); | |
53 | |
54 | |
55 %Get total number of reads | |
56 NR_OF_READS=size(reads,1); | |
57 TEMP_COUNT{2}=NR_OF_READS; | |
58 | |
59 | |
60 NR_OF_TRANS=size(gene.transcripts,2); | |
61 %check that the gene has more than one isoform | |
62 if NR_OF_TRANS<=1 | |
63 STAT{i}=TEMP_COUNT; | |
64 continue; | |
65 end | |
66 | |
67 | |
68 EXON_IDX=sum(gene.exonsequence,1)<NR_OF_TRANS; | |
69 | |
70 %Transform the reads in to counts per region | |
71 [NEW_READS,UNEXPLAINED_READS,UNEXPLAINED_INDEX]= convert_reads_to_region_indicators(reads,gene); | |
72 | |
73 if length(unique(sum(NEW_READS,2)))>1 | |
74 warning(['Assignment of reads to regions is not unique for gene:' gene.name '\n']); | |
75 end | |
76 | |
77 | |
78 %Calulate gene expression | |
79 %Get the non_unique_regions | |
80 NON_ALT_EIRS=sum(gene.eirs_in_seq,1)==NR_OF_TRANS; | |
81 TEMP_COUNT{3}=sum(sum(NEW_READS(:,NON_ALT_EIRS),2)>0); | |
82 | |
83 | |
84 %Get Counts for nonparametric variance function | |
85 if CFG.perform_nonparametric | |
86 %Get the read starts | |
87 [TEMP,START]=max(reads,[],2); | |
88 read_starts=sparse((1:NR_OF_READS)',START,ones(NR_OF_READS,1),NR_OF_READS,size(reads,2),NR_OF_READS); | |
89 | |
90 %Get the index of the alternative reads | |
91 ALT_READ_IX=zeros(size(reads,1),1); | |
92 ALT_EIRS=and(sum(gene.eirs_in_seq,1)<NR_OF_TRANS,sum(gene.eirs_in_seq,1)>0); | |
93 ALT_READ_IX(UNEXPLAINED_INDEX==0)=sum(NEW_READS(:,ALT_EIRS),2)>0; | |
94 | |
95 %Get the coverage of the read starts | |
96 %COVERAGE=sum(read_starts(find(ALT_READ_IX>0),:),1); | |
97 if CFG.only_gene_start | |
98 COVERAGE=sum(reads,1); | |
99 else | |
100 COVERAGE=sum(reads(find(ALT_READ_IX>0),:),1); | |
101 end | |
102 if max(COVERAGE)>0 | |
103 TEMP_COUNT{4}=COVERAGE; | |
104 else | |
105 TEMP_COUNT{4}=[]; | |
106 end | |
107 end | |
108 | |
109 | |
110 %Get counts for parametric settting | |
111 if or(CFG.perform_parametric,CFG.perform_poisson) | |
112 %Get the alternative reads | |
113 ALT_EIRS=and(sum(gene.eirs_in_seq,1)<NR_OF_TRANS,sum(gene.eirs_in_seq,1)>0); | |
114 | |
115 %Return the Counts in the alternative EIRS | |
116 | |
117 COUNTS_PER_EIR=sum(NEW_READS(:,ALT_EIRS),1); | |
118 EXS_SEQ=gene.eirs_in_seq(:,ALT_EIRS); | |
119 [NEWCOLS,IDX2,POS]=unique(EXS_SEQ','rows'); | |
120 NEWCOLS=NEWCOLS'; | |
121 EIR_COUNTS=zeros(1,length(IDX2)); | |
122 for j=1:max(POS) | |
123 EIR_COUNTS(j)=sum(COUNTS_PER_EIR(POS==j)); | |
124 end | |
125 TEMP_COUNT{6}=EIR_COUNTS; | |
126 end | |
127 | |
128 clear NEW_READS | |
129 clear reads; | |
130 COUNTS{i}=TEMP_COUNT; | |
131 | |
132 OLD_OUT_STR=OUT_STR; | |
133 OUT_STR=['Finished ' num2str(i) ' out of ' num2str(size(genes,2)) ' genes']; | |
134 %print progress | |
135 if CFG.use_rproc | |
136 fprintf([OUT_STR '\n']) | |
137 else | |
138 % Erase old progress | |
139 fprintf(repmat('\b',1,length(OLD_OUT_STR))); | |
140 fprintf([OUT_STR]) | |
141 end | |
142 | |
143 | |
144 end | |
145 fprintf('\n') | |
146 %Save the counts | |
147 OUT_FILENAME=[CFG.outfile_prefix]; | |
148 save(OUT_FILENAME,'COUNTS') | |
149 %Save the nonparametric histogram | |
150 |