Mercurial > repos > prog > lcmsmatching
comparison MassdbConn.R @ 2:20d69a062da3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
| author | prog |
|---|---|
| date | Thu, 02 Mar 2017 08:55:00 -0500 |
| parents | 253d531a0193 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:253d531a0193 | 2:20d69a062da3 |
|---|---|
| 1 if ( ! exists('MassdbConn')) { | 1 ##################### |
| 2 # CLASS DECLARATION # | |
| 3 ##################### | |
| 2 | 4 |
| 3 source('BiodbConn.R') | 5 MassdbConn <- methods::setRefClass("MassdbConn", contains = "BiodbConn") |
| 4 | 6 |
| 5 ##################### | 7 ############################### |
| 6 # CLASS DECLARATION # | 8 # GET CHROMATOGRAPHIC COLUMNS # |
| 7 ##################### | 9 ############################### |
| 10 | |
| 11 # Get a list of chromatographic columns contained in this database. | |
| 12 # compound.ids A list of compound IDs used to filter results. | |
| 13 # The returned value is a data.frame with two columns : one for the ID (BIODB.ID) and another one for the title (BIODB.TITLE). | |
| 14 MassdbConn$methods( getChromCol = function(compound.ids = NULL) { | |
| 15 stop("Method getChromCol() is not implemented in concrete class.") | |
| 16 }) | |
| 17 | |
| 18 ################# | |
| 19 # GET MZ VALUES # | |
| 20 ################# | |
| 21 | |
| 22 # Returns a numeric vector of all masses stored inside the database. | |
| 23 MassdbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { | |
| 24 stop("Method getMzValues() not implemented in concrete class.") | |
| 25 }) | |
| 26 | |
| 27 ################ | |
| 28 # GET NB PEAKS # | |
| 29 ################ | |
| 30 | |
| 31 # Returns the number of peaks contained in the database | |
| 32 MassdbConn$methods( getNbPeaks = function(mode = NULL, compound.ids = NULL) { | |
| 33 stop("Method getNbPeaks() not implemented in concrete class.") | |
| 34 }) | |
| 35 | |
| 36 ######################### | |
| 37 # FIND COMPOUND BY NAME # | |
| 38 ######################### | |
| 39 | |
| 40 # Find a molecule by name | |
| 41 # name A vector of molecule names to search for. | |
| 42 # Return an integer vector of the same size as the name input vector, containing the found molecule IDs, in the same order. | |
| 43 MassdbConn$methods( findCompoundByName = function(name) { | |
| 44 stop("Method findCompoundByName() not implemented in concrete class.") | |
| 45 }) | |
| 46 | |
| 47 #################################### | |
| 48 # FIND SPECTRA IN GIVEN MASS RANGE # | |
| 49 #################################### | |
| 50 # Find spectra in the given mass range. | |
| 51 # rtype the type of return, objects, dfspecs data.frame of spectra, dfpeaks data.frame of peaks. | |
| 52 MassdbConn$methods( searchMzRange = function(mzmin, mzmax, rtype = c("objects","dfspecs","dfpeaks")){ | |
| 53 stop("Method searchMzRange() not implemented in concrete class.") | |
| 54 }) | |
| 55 | |
| 56 #################################### | |
| 57 # FIND SPECTRA IN GIVEN MASS RANGE # | |
| 58 #################################### | |
| 59 MassdbConn$methods( searchMzTol = function(mz, tol, tolunit=BIODB.MZTOLUNIT.PLAIN, rtype = c("objects","dfspecs","dfpeaks")){ | |
| 60 stop("Method searchMzTol() not implemented in concrete class.") | |
| 61 }) | |
| 62 | |
| 63 ###################################################### | |
| 64 # FIND A MOLECULES WITH PRECURSOR WITHIN A TOLERANCE # | |
| 65 ###################################################### | |
| 66 MassdbConn$methods( searchSpecPrecTol = function(mz, tol, tolunit=BIODB.MZTOLUNIT.PLAIN, mode = NULL){ | |
| 67 stop("Method searchSpecPrecTol not implemented in concrete class.") | |
| 68 }) | |
| 69 | |
| 70 ################################# | |
| 71 #perform a database MS-MS search# | |
| 72 ################################# | |
| 73 | |
| 74 ### spec : the spec to match against the database. | |
| 75 ### precursor : the mass/charge of the precursor to be looked for. | |
| 76 ### mtol : the size of the windows arounf the precursor to be looked for. | |
| 77 ### ppm : the matching ppm tolerance. | |
| 78 ### fun : | |
| 79 ### dmz : the mass tolerance is taken as the minium between this quantity and the ppm. | |
| 80 ### npmin : the minimum number of peak to detect a match (2 recommended) | |
| 81 | |
| 82 MassdbConn$methods( msmsSearch = function(spec, precursor, mztol, tolunit, | |
| 83 ppm, fun = BIODB.MSMS.DIST.WCOSINE, | |
| 84 params = list(), npmin=2, dmz = 0.001, | |
| 85 mode = BIODB.MSMODE.POS, return.ids.only = TRUE){ | |
| 86 | |
| 8 | 87 |
| 9 MassdbConn <- setRefClass("MassdbConn", contains = "BiodbConn") | 88 # TODO replace by msms precursor search when available. |
| 89 lspec <- .self$searchSpecPrecTol( precursor, mztol, BIODB.MZTOLUNIT.PLAIN, mode = mode) | |
| 90 rspec <- lapply(lspec,function(x){ | |
| 91 peaks <- x$getFieldValue(BIODB.PEAKS) | |
| 92 | |
| 93 ####Getting the correct fields | |
| 94 vcomp <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY, BIODB.PEAK.INTENSITY) | |
| 95 | |
| 96 foundfields <- vcomp %in% colnames(peaks) | |
| 97 if(sum(foundfields ) < 2){ | |
| 98 stop(paste0("fields can't be coerced to mz and intensity : ",colnames(peaks))) | |
| 99 } | |
| 100 | |
| 101 peaks <- peaks[ , vcomp[which( foundfields ) ] ] | |
| 102 | |
| 103 peaks | |
| 104 }) | |
| 105 | |
| 106 # TODO Import compareSpectra into biodb and put it inside massdb-helper.R or hide it as a private method. | |
| 107 res <- compareSpectra(spec, rspec, npmin = npmin, fun = fun, params = params) | |
| 108 | |
| 109 if(is.null(res)) return(NULL) # To decide at MassdbConn level: return empty list (or empty data frame) or NULL. | |
| 110 ###Adiing the matched peaks and the smimlarity values to spectra. | |
| 111 | |
| 112 lret <-vector(length(lspec),mode = "list") | |
| 113 vsimilarity <- numeric( length( lspec ) ) | |
| 114 vmatched <- vector( mode = "list", length( lspec ) ) | |
| 115 | |
| 116 if( return.ids.only ){ | |
| 117 lret <- sapply( lspec, function( x ) { | |
| 118 x$getFieldValue( BIODB.ACCESSION ) | |
| 119 }) | |
| 120 }else{ | |
| 121 ###TODO implement three types of return. | |
| 122 lret <- lspec | |
| 123 } | |
| 124 | |
| 125 ###Reordering the list. | |
| 126 lret <- lret[ res$ord ] | |
| 127 | |
| 10 | 128 |
| 11 ############################### | 129 return( list(measure = res$similarity[ res$ord ], matchedpeaks = res$matched [ res$ord ], id = lret)) |
| 12 # GET CHROMATOGRAPHIC COLUMNS # | 130 }) |
| 13 ############################### | |
| 14 | |
| 15 # Get a list of chromatographic columns contained in this database. | |
| 16 # compound.ids A list of compound IDs used to filter results. | |
| 17 # The returned value is a data.frame with two columns : one for the ID (BIODB.ID) and another one for the title (BIODB.TITLE). | |
| 18 MassdbConn$methods( getChromCol = function(compound.ids = NULL) { | |
| 19 stop("Method getChromCol() is not implemented in concrete class.") | |
| 20 }) | |
| 21 | |
| 22 ################# | |
| 23 # GET MZ VALUES # | |
| 24 ################# | |
| 25 | |
| 26 # Returns a numeric vector of all masses stored inside the database. | |
| 27 MassdbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { | |
| 28 stop("Method getMzValues() not implemented in concrete class.") | |
| 29 }) | |
| 30 | |
| 31 } |
