annotate maaslin-4450aa4ecc84/src/lib/SummarizeMaaslin.R @ 1:a87d5a5f2776

Uploaded the version running on the prod server
author george-weingart
date Sun, 08 Feb 2015 23:08:38 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1 #####################################################################################
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
2 #Copyright (C) <2012>
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
3 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
5 #this software and associated documentation files (the "Software"), to deal in the
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
6 #Software without restriction, including without limitation the rights to use, copy,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
8 #and to permit persons to whom the Software is furnished to do so, subject to
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
9 #the following conditions:
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
10 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
11 #The above copyright notice and this permission notice shall be included in all copies
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
12 #or substantial portions of the Software.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
13 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
20 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
22 # authored by the Huttenhower lab at the Harvard School of Public Health
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
24 #####################################################################################
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
25
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
26 inlinedocs <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
27 ##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
28 ##description<< Creates a summary of association detail files.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
29 ) { return( pArgs ) }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
30
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
31 #Logging class
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
32 suppressMessages(library(logging, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
33
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
34 # Get logger
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
35 c_logrMaaslin <- getLogger( "maaslin" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
36
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
37 funcSummarizeDirectory = function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
38 ### Summarizes the massline detail files into one file based on significance.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
39 astrOutputDirectory,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
40 ### The output directory to find the MaAsLin results.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
41 strBaseName,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
42 ### The prefix string used in maaslin to start the detail files.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
43 astrSummaryFileName,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
44 ### The summary file's name, should be a path not a file name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
45 astrKeyword,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
46 ### The column name of the data to check significance before adding a detail to the summary
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
47 afSignificanceLevel
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
48 ### The value of significance the data must be at or below to be included in the summary (0.0 is most significant; like p-values)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
49 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
50 #Store significant data elements
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
51 dfSignificantData = NULL
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
52
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
53 #Get detail files in output directory
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
54 astrlsDetailFiles = list.files(astrOutputDirectory, pattern=paste(strBaseName,"-","[[:print:]]*",c_sDetailFileSuffix,sep=""), full.names=TRUE)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
55 logdebug(format(astrlsDetailFiles),c_logrMaaslin)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
56
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
57 #For each file after the first file
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
58 for(astrFile in astrlsDetailFiles)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
59 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
60 #Read in data and reduce to significance
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
61 dfDetails = read.table(astrFile, header=TRUE, sep=c_cTableDelimiter)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
62 dfDetails = dfDetails[which(dfDetails[astrKeyword] <= afSignificanceLevel),]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
63
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
64 #Combine with other data if it exists
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
65 if(is.null(dfSignificantData))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
66 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
67 dfSignificantData = dfDetails
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
68 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
69 dfSignificantData = rbind(dfSignificantData,dfDetails)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
70 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
71 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
72
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
73 #Write data to file
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
74 unlink(astrSummaryFileName)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
75 if(is.null(dfSignificantData))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
76 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
77 funcWrite("No significant data found.",astrSummaryFileName)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
78 return( NULL )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
79 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
80 #Sort by metadata and then significance
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
81 dfSignificantData = dfSignificantData[order(dfSignificantData$Value, dfSignificantData$P.value, decreasing = FALSE),]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
82 funcWriteTable( dfSignificantData, astrSummaryFileName, fAppend = FALSE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
83 # Sort by q.value and return
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
84 return( dfSignificantData[ order( dfSignificantData$P.value, decreasing = FALSE ), ] )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
85 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
86 }