Mercurial > repos > vipints > rdiff
comparison rDiff/mex/read.cpp @ 0:0f80a5141704
version 0.3 uploaded
| author | vipints | 
|---|---|
| date | Thu, 14 Feb 2013 23:38:36 -0500 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:0f80a5141704 | 
|---|---|
| 1 /* | |
| 2 * This program is free software; you can redistribute it and/or modify | |
| 3 * it under the terms of the GNU General Public License as published by | |
| 4 * the Free Software Foundation; either version 3 of the License, or | |
| 5 * (at your option) any later version. | |
| 6 * | |
| 7 * Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch | |
| 8 * Copyright (C) 2010-2011 Max Planck Society | |
| 9 */ | |
| 10 | |
| 11 | |
| 12 #include "read.h" | |
| 13 | |
| 14 CRead::CRead() { | |
| 15 read_id = NULL; | |
| 16 sam_line = NULL; | |
| 17 start_pos = 0; | |
| 18 matches = 0; | |
| 19 mismatches = 0; | |
| 20 multiple_alignment_index = 0; | |
| 21 strand = NULL; | |
| 22 left = false; | |
| 23 right = false; | |
| 24 reverse = false; | |
| 25 } | |
| 26 | |
| 27 CRead::~CRead() { | |
| 28 delete[] read_id; | |
| 29 delete[] sam_line; | |
| 30 delete[] strand; | |
| 31 } | |
| 32 | |
| 33 /* | |
| 34 * Augments 'coverage' array at the positions covered by the read in the queried interval. | |
| 35 */ | |
| 36 void CRead::get_coverage(int p_start_pos, int p_end_pos, uint32_t* coverage) | |
| 37 { | |
| 38 // block1 block2 | |
| 39 // |=====|======|============|===========|======|====| | |
| 40 // ^ ^ ^ | |
| 41 // p_start_pos | p_end_pos | |
| 42 // start_pos | |
| 43 // |0000001111111111111000000000000111111100000| | |
| 44 // *coverage | |
| 45 int len = p_end_pos-p_start_pos+1; | |
| 46 for (uint32_t n = 0; n < block_starts.size(); n++) { | |
| 47 int32_t from, to; | |
| 48 from = block_starts[n]+start_pos-p_start_pos; | |
| 49 to = block_starts[n]+start_pos-p_start_pos+block_lengths[n]; | |
| 50 if (from < 0) | |
| 51 from = 0; | |
| 52 if (to < 0) | |
| 53 continue; | |
| 54 else if (to > len) | |
| 55 to = len; | |
| 56 for (int bp=from; bp<to; bp++) { | |
| 57 coverage[bp]++; | |
| 58 } | |
| 59 } | |
| 60 } | |
| 61 int CRead::get_last_position() | |
| 62 { | |
| 63 if (block_starts.size()>0) // this if for some reason zero in case of softclips | |
| 64 return start_pos+block_starts.back()+block_lengths.back(); | |
| 65 return -1; | |
| 66 } | |
| 67 | |
| 68 /* | |
| 69 * Adds the column indices (= positions) covered by the read to 'reads' array in current row (= read). | |
| 70 * These indices can be used to build up a sparse matrix of reads x positions. | |
| 71 */ | |
| 72 void CRead::get_reads_sparse(int p_start_pos, int p_end_pos, double* reads, uint32_t & reads_c, uint32_t row_idx) { | |
| 73 int len = p_end_pos-p_start_pos+1; | |
| 74 for (uint32_t n = 0; n < block_starts.size(); n++) { | |
| 75 uint32_t from, to; | |
| 76 if (block_starts[n]+start_pos-p_start_pos >= 0) | |
| 77 from = block_starts[n]+start_pos-p_start_pos; | |
| 78 else | |
| 79 from = 0; | |
| 80 if (block_starts[n]+start_pos-p_start_pos+block_lengths[n] >= 0) | |
| 81 to = block_starts[n]+start_pos-p_start_pos+block_lengths[n]; | |
| 82 else | |
| 83 to = 0; | |
| 84 for (int bp=from; bp<to&bp<len; bp++) { | |
| 85 reads[reads_c] = row_idx+1; // row indices for sparse matrix | |
| 86 reads[reads_c+1] = bp+1; // column indices for sparse matrix | |
| 87 reads_c += 2; | |
| 88 } | |
| 89 } | |
| 90 } | |
| 91 | |
| 92 void CRead::get_acc_splice_sites(vector<int>* acc_pos) | |
| 93 { | |
| 94 if (strand[0]=='+') | |
| 95 { | |
| 96 for (int k=1;k<block_starts.size(); k++) | |
| 97 acc_pos->push_back(start_pos+block_starts[k]-1); | |
| 98 } | |
| 99 else if (strand[0]=='-') | |
| 100 { | |
| 101 for (int k=1;k<block_starts.size(); k++) | |
| 102 acc_pos->push_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2); | |
| 103 } | |
| 104 } | |
| 105 | |
| 106 void CRead::get_don_splice_sites(vector<int>* don_pos) | |
| 107 { | |
| 108 | |
| 109 if (strand[0]=='+') | |
| 110 { | |
| 111 for (int k=1;k<block_starts.size(); k++) | |
| 112 don_pos->push_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2); | |
| 113 } | |
| 114 else if (strand[0]=='-') | |
| 115 { | |
| 116 for (int k=1;k<block_starts.size(); k++) | |
| 117 don_pos->push_back(start_pos+block_starts[k]-1); | |
| 118 } | |
| 119 } | |
| 120 | |
| 121 int CRead::min_exon_len() | |
| 122 { | |
| 123 int min = 1e8; | |
| 124 for (int k=0;k<block_starts.size(); k++) | |
| 125 if (block_lengths[k]<min) | |
| 126 min = block_lengths[k]; | |
| 127 return min; | |
| 128 } | |
| 129 | |
| 130 int CRead::max_intron_len() | |
| 131 { | |
| 132 int max = 0; | |
| 133 for (int k=1;k<block_starts.size(); k++) | |
| 134 if (block_starts[k]-(block_starts[k-1]+block_lengths[k-1])>max) | |
| 135 max = block_starts[k]-(block_starts[k-1]+block_lengths[k-1]); | |
| 136 return max; | |
| 137 } | |
| 138 | |
| 139 /* | |
| 140 * Adds start and end of introns in the read consecutively to the 'introns' vector. | |
| 141 */ | |
| 142 void CRead::get_introns(vector<int>* introns) | |
| 143 { | |
| 144 for (int i=1; i<block_starts.size(); i++) | |
| 145 { | |
| 146 int istart = block_starts[i-1]+block_lengths[i-1]+start_pos; | |
| 147 int iend = block_starts[i]+start_pos-1; | |
| 148 introns->push_back(istart); | |
| 149 introns->push_back(iend); | |
| 150 //fprintf(stdout, "%i intron: %d->%d\n", i, istart, iend); | |
| 151 } | |
| 152 } | |
| 153 void CRead::get_introns(vector<uint32_t>* intron_starts, vector<uint32_t>* intron_ends, vector<uint32_t>* block_len1, vector<uint32_t>* block_len2) | |
| 154 { | |
| 155 for (int i=1; i<block_starts.size(); i++) | |
| 156 { | |
| 157 uint32_t istart = block_starts[i-1]+block_lengths[i-1]+start_pos; | |
| 158 uint32_t iend = block_starts[i]+start_pos-1; | |
| 159 intron_starts->push_back(istart); | |
| 160 intron_ends->push_back(iend); | |
| 161 block_len1->push_back(block_lengths[i-1]) ; | |
| 162 block_len2->push_back(block_lengths[i]) ; | |
| 163 } | |
| 164 } | |
| 165 | |
| 166 bool CRead::operator==(const CRead& read) const | |
| 167 { | |
| 168 if (block_starts.size()!=read.block_starts.size()) | |
| 169 return false; | |
| 170 if (block_lengths.size()!=read.block_lengths.size()) | |
| 171 return false; | |
| 172 if (start_pos!=read.start_pos) | |
| 173 return false; | |
| 174 if (strand[0] != read.strand[0]) | |
| 175 return false; | |
| 176 for (int i=0; i<block_starts.size(); i++) | |
| 177 if (block_starts[i]!=read.block_starts[i]) | |
| 178 return false; | |
| 179 for (int i=0; i<block_lengths.size(); i++) | |
| 180 if (block_lengths[i]!=read.block_lengths[i]) | |
| 181 return false; | |
| 182 return true; | |
| 183 } | |
| 184 | |
| 185 void CRead::print() | |
| 186 { | |
| 187 fprintf(stdout, "start_pos: %d\n", start_pos); | |
| 188 fprintf(stdout, "starts:"); | |
| 189 for (int i=0; i<block_starts.size(); i++) | |
| 190 { | |
| 191 fprintf(stdout, " %d", block_starts[i]); | |
| 192 } | |
| 193 fprintf(stdout, "\n"); | |
| 194 | |
| 195 fprintf(stdout, "lengths:"); | |
| 196 for (int i=0; i<block_starts.size(); i++) | |
| 197 { | |
| 198 fprintf(stdout, " %d", block_lengths[i]); | |
| 199 } | |
| 200 fprintf(stdout, "\n"); | |
| 201 } | |
| 202 | |
| 203 void CRead::set_strand(char s) | |
| 204 { | |
| 205 delete[] strand; | |
| 206 strand = new char [2]; | |
| 207 strand[0] = s; | |
| 208 strand[1] = '0'; | |
| 209 } | |
| 210 | |
| 211 int CRead::get_mismatches() | |
| 212 { | |
| 213 return mismatches ; | |
| 214 } | 
