Mercurial > repos > iuc > limma_voom
changeset 4:a61a6e62e91f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 6a458881c0819b75e55e64b3f494679d43bb9ee8
author | iuc |
---|---|
date | Sun, 29 Apr 2018 17:36:42 -0400 |
parents | 38aab66ae5cb |
children | d8a55b5f0de0 |
files | limma_voom.R limma_voom.xml test-data/anno.txt test-data/limma-trend_Mut-WT.tsv test-data/limma-voom_Mut-WT.tsv test-data/limma-voom_Mut-WT_2fact.tsv test-data/limma-voom_Mut-WT_2fact_anno.tsv test-data/limma-voom_Mut-WT_anno.tsv test-data/limma-voom_WT-Mut.tsv test-data/limma-voom_WT-Mut_2fact_anno.tsv test-data/limma-voom_normcounts.tsv test-data/limma-voom_normcounts_anno.tsv test-data/out_rscript.txt |
diffstat | 13 files changed, 1000 insertions(+), 118 deletions(-) [+] |
line wrap: on
line diff
--- a/limma_voom.R Wed Jan 31 12:45:42 2018 -0500 +++ b/limma_voom.R Sun Apr 29 17:36:42 2018 -0400 @@ -349,7 +349,9 @@ data <- list() data$counts <- counts if (haveAnno) { - data$genes <- geneanno + # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno) + annoord <- geneanno[match(row.names(counts), geneanno[,1]), ] + data$genes <- annoord } else { data$genes <- data.frame(GeneID=row.names(counts)) } @@ -431,6 +433,7 @@ # limma-trend approach logCPM <- cpm(data, log=TRUE, prior.count=opt$trend) fit <- lmFit(logCPM, design) + fit$genes <- data$genes fit <- contrasts.fit(fit, contrasts) if (wantRobust) { fit <- eBayes(fit, trend=TRUE, robust=TRUE) @@ -459,7 +462,7 @@ # Save normalised counts (log2cpm) if (wantNorm) { - write.table(logCPM, file=normOut, row.names=TRUE, sep="\t") + write.table(logCPM, file=normOut, row.names=TRUE, sep="\t", quote=FALSE) linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv")))) } } else { @@ -510,7 +513,7 @@ # Save normalised counts (log2cpm) if (wantNorm) { norm_counts <- data.frame(vData$genes, vData$E) - write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t") + write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE) linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv")))) } @@ -554,11 +557,7 @@ # Write top expressions table top <- topTable(fit, coef=i, number=Inf, sort.by="P") - if (wantTrend) { - write.table(top, file=topOut[i], row.names=TRUE, sep="\t") - } else { - write.table(top, file=topOut[i], row.names=FALSE, sep="\t") - } + write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE) linkName <- paste0(deMethod, "_", contrastData[i], ".tsv") linkAddr <- paste0(deMethod, "_", contrastData[i], ".tsv") @@ -627,7 +626,7 @@ cata("<body>\n") cata("<h3>Limma Analysis Output:</h3>\n") -cata("PDF copies of JPEGS available in 'Plots' section.<br />\n") +cata("Links to PDF copies of plots are in 'Plots' section below />\n") if (wantWeight) { HtmlImage(imageData$Link[1], imageData$Label[1], width=1000) } else {
--- a/limma_voom.xml Wed Jan 31 12:45:42 2018 -0500 +++ b/limma_voom.xml Sun Apr 29 17:36:42 2018 -0400 @@ -1,10 +1,10 @@ -<tool id="limma_voom" name="limma" version="3.34.6.0"> +<tool id="limma_voom" name="limma" version="3.34.9.0"> <description> Perform differential expression with limma-voom or limma-trend </description> <requirements> - <requirement type="package" version="3.34.6">bioconductor-limma</requirement> + <requirement type="package" version="3.34.9">bioconductor-limma</requirement> <requirement type="package" version="3.20.7">bioconductor-edger</requirement> <requirement type="package" version="1.4.30">r-statmod</requirement> <requirement type="package" version="0.5.0">r-scales</requirement> @@ -102,6 +102,10 @@ && cp '$outReport.files_path'/*.tsv output_dir/ + +#if $out.rscript: + && cp '$__tool_directory__/limma_voom.R' '$rscript' +#end if ]]></command> <inputs> @@ -138,12 +142,12 @@ </param> <repeat name="rep_group" title="Group" min="2" default="2"> <param name="groupName" type="text" label="Name" - help="Name of group that the counts files(s) belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive)."> + help="Name of group that the counts files belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive)."> <sanitizer> <valid initial="string.letters,string.digits"><add value="_" /></valid> </sanitizer> </param> - <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/> + <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts files"/> </repeat> </repeat> </when> @@ -245,6 +249,7 @@ label="Output Normalised Counts Table?" help="Output a file containing the normalised counts, these are in log2 counts per million (logCPM). Default: No"> </param> + <param name="rscript" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used will be provided as a text file in the output. Default: No"/> <param name="rdaOption" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. A link to the RData file will be provided in the HTML report. Default: No"> @@ -281,6 +286,9 @@ <collection name="outTables" type="list" label="${tool.name} on ${on_string}: Tables"> <discover_datasets pattern="(?P<name>.+)\.tsv$" format="tabular" directory="output_dir" visible="false" /> </collection> + <data name="rscript" format="txt" label="${tool.name} on ${on_string}: Rscript"> + <filter>out['rscript']</filter> + </data> </outputs> <tests> @@ -300,8 +308,18 @@ </repeat> <param name="normalisationOption" value="TMM" /> <output_collection name="outTables" count="2"> - <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT.tsv" /> - <element name="limma-voom_WT-Mut" ftype="tabular" file="limma-voom_WT-Mut.tsv" /> + <element name="limma-voom_Mut-WT" ftype="tabular" > + <assert_contents> + <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" /> + <has_text_matching expression="11304.*0.4573" /> + </assert_contents> + </element> + <element name="limma-voom_WT-Mut" ftype="tabular" > + <assert_contents> + <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" /> + <has_text_matching expression="11304.*-0.4573" /> + </assert_contents> + </element> </output_collection> <output name="outReport" > <assert_contents> @@ -309,7 +327,7 @@ <not_has_text text="RData" /> </assert_contents> </output> - </test> + </test> <!-- Ensure annotation file input works --> <test> <param name="format" value="matrix" /> @@ -325,12 +343,18 @@ </repeat> <param name="normalisationOption" value="TMM" /> <output_collection name="outTables" count="1"> - <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT_anno.tsv" /> + <element name="limma-voom_Mut-WT" ftype="tabular" > + <assert_contents> + <has_text_matching expression="EntrezID.*Symbol.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" /> + <has_text_matching expression="11304.*Abca4.*0.4573" /> + </assert_contents> + </element> </output_collection> </test> - <!-- Ensure RData file can be output --> + <!-- Ensure Rscript and RData file can be output --> <test> <param name="format" value="matrix" /> + <param name="rscript" value="True"/> <param name="rdaOption" value="true" /> <param name="counts" value="matrix.txt" /> <repeat name="rep_factor"> @@ -346,6 +370,7 @@ <has_text text="RData" /> </assert_contents> </output> + <output name="rscript" value="out_rscript.txt"/> </test> <!-- Ensure secondary factors work --> <test> @@ -364,7 +389,12 @@ </repeat> <param name="normalisationOption" value="TMM" /> <output_collection name="outTables" count="1" > - <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT_2fact.tsv" /> + <element name="limma-voom_Mut-WT" ftype="tabular" > + <assert_contents> + <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" /> + <has_text_matching expression="11304.*0.4590" /> + </assert_contents> + </element> </output_collection> </test> <!-- Ensure factors file input works --> @@ -378,7 +408,12 @@ </repeat> <param name="normalisationOption" value="TMM" /> <output_collection name="outTables" count="1"> - <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT_2fact.tsv" /> + <element name="limma-voom_Mut-WT" ftype="tabular" > + <assert_contents> + <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" /> + <has_text_matching expression="11304.*0.4590" /> + </assert_contents> + </element> </output_collection> </test> <!-- Ensure normalised counts file output works--> @@ -395,8 +430,18 @@ </repeat> <param name="normalisationOption" value="TMM" /> <output_collection name="outTables" count="2"> - <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT.tsv" /> - <element name="limma-voom_normcounts" ftype="tabular" file="limma-voom_normcounts.tsv" /> + <element name="limma-voom_Mut-WT" ftype="tabular" > + <assert_contents> + <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" /> + <has_text_matching expression="11304.*0.4573" /> + </assert_contents> + </element> + <element name="limma-voom_normcounts" ftype="tabular" > + <assert_contents> + <has_text_matching expression="GeneID.*Mut1.*Mut2.*Mut3.*WT1.*WT2.*WT3" /> + <has_text_matching expression="11304.*15.7545" /> + </assert_contents> + </element> </output_collection> </test> <!-- Ensure multiple counts files input works --> @@ -438,9 +483,24 @@ </repeat> <param name="normCounts" value="true" /> <output_collection name="outTables" count="3"> - <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT_2fact_anno.tsv" /> - <element name="limma-voom_WT-Mut" ftype="tabular" file="limma-voom_WT-Mut_2fact_anno.tsv" /> - <element name="limma-voom_normcounts" ftype="tabular" file="limma-voom_normcounts_anno.tsv" /> + <element name="limma-voom_Mut-WT" ftype="tabular" > + <assert_contents> + <has_text_matching expression="logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" /> + <has_text_matching expression="11304.*Abca4.*0.4590" /> + </assert_contents> + </element> + <element name="limma-voom_WT-Mut" ftype="tabular" > + <assert_contents> + <has_text_matching expression="EntrezID.*Symbol.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" /> + <has_text_matching expression="11304.*Abca4.*-0.4590" /> + </assert_contents> + </element> + <element name="limma-voom_normcounts" ftype="tabular" > + <assert_contents> + <has_text_matching expression="EntrezID.*Symbol.*Mut1.*Mut2.*Mut3.*WT1.*WT2.*WT3" /> + <has_text_matching expression="11304.*Abca4.*15.7545" /> + </assert_contents> + </element> </output_collection> </test> <!-- Ensure limma-trend option works --> @@ -463,7 +523,42 @@ </assert_contents> </output> <output_collection name="outTables" count="1"> - <element name="limma-trend_Mut-WT" ftype="tabular" file="limma-trend_Mut-WT.tsv" /> + <element name="limma-trend_Mut-WT" ftype="tabular" > + <assert_contents> + <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" /> + <has_text_matching expression="11304.*0.4540" /> + </assert_contents> + </element> + </output_collection> + </test> + <!-- Ensure limma-trend option with annotation works --> + <test> + <param name="format" value="matrix" /> + <param name="counts" value="matrix.txt" /> + <param name="annoOpt" value="yes" /> + <param name="geneanno" value="anno.txt" /> + <repeat name="rep_factor"> + <param name="factorName" value="Genotype"/> + <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT" /> + </repeat> + <repeat name="rep_contrast"> + <param name="contrast" value="Mut-WT" /> + </repeat> + <param name="normalisationOption" value="TMM" /> + <param name="de_select" value="trend" /> + <param name="rdaOption" value="true" /> + <output name="outReport" > + <assert_contents> + <has_text text="The limma-trend method was used" /> + </assert_contents> + </output> + <output_collection name="outTables" count="1"> + <element name="limma-trend_Mut-WT" ftype="tabular" > + <assert_contents> + <has_text_matching expression="EntrezID.*Symbol.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" /> + <has_text_matching expression="11304.*0.4540" /> + </assert_contents> + </element> </output_collection> </test> </tests> @@ -488,7 +583,6 @@ ratio of the largest library size to the smallest is not more than about 3-fold. When the library sizes are quite variable between samples, then the voom approach is theoretically more powerful than limma-trend. For more information see the excellent `limma User's Guide`_. **Counts Data:** - The counts data can either be input as separate counts files (one sample per file) or a single count matrix (one sample per column). The rows correspond to genes, and columns correspond to the counts for the samples. Values must be tab separated, with the first row containing the sample/column labels and the first column containing the row/gene labels. Gene identifiers can be of any type but must be unique and not repeated within a counts file. Example - **Separate Count Files**: @@ -520,19 +614,19 @@ **Gene Annotations:** Optional input for gene annotations, this can contain more information about the genes than just an ID number. The annotations will -be available in the differential expression results table and the optional normalised counts table. +be available in the differential expression results table and the optional normalised counts table. The file must contain a header row and have the gene IDs in the first column. The number of rows should match that of the counts files, add NA for any gene IDs with no annotation. The Galaxy tool **annotateMyIDs** can be used to obtain annotations for human, mouse, fly and zebrafish. Example: ========== ========== =================================================== **GeneID** **Symbol** **GeneName** ---------- ---------- --------------------------------------------------- - 1287 Pzp pregnancy zone protein - 1298 Aanat arylalkylamine N-acetyltransferase - 1302 Aatk apoptosis-associated tyrosine kinase - 1303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 - 1304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 - 1305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 + 11287 Pzp pregnancy zone protein + 11298 Aanat arylalkylamine N-acetyltransferase + 11302 Aatk apoptosis-associated tyrosine kinase + 11303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 + 11304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 + 11305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 ========== ========== =================================================== **Factor Information:** @@ -556,24 +650,6 @@ *Groups:* The names of the groups for the factor. These must be entered in the same order as the samples (to which the groups correspond) are listed in the columns of the counts matrix. Spaces must not be used and if entered into the tool form above, the values should be separated by commas. -**Gene Annotations:** -Optional input for gene annotations, this can contain more -information about the genes than just an ID number. The annotations will -be available in the differential expression results table and the optional normalised counts table. - -Example: - - ========== ========== =================================================== - **GeneID** **Symbol** **GeneName** - ---------- ---------- --------------------------------------------------- - 1287 Pzp pregnancy zone protein - 1298 Aanat arylalkylamine N-acetyltransferase - 1302 Aatk apoptosis-associated tyrosine kinase - 1303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 - 1304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 - 1305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 - ========== ========== =================================================== - **Contrasts of Interest:** The contrasts you wish to make between levels. A common contrast would be a simple difference between two levels: "Mut-WT" @@ -675,6 +751,7 @@ Optionally, under **Output Options** you can choose to output * a normalised counts table + * the R script used by this tool * an RData file -----
--- a/test-data/anno.txt Wed Jan 31 12:45:42 2018 -0500 +++ b/test-data/anno.txt Sun Apr 29 17:36:42 2018 -0400 @@ -1,7 +1,7 @@ EntrezID Symbol GeneName Chr Length +11302 Aatk apoptosis-associated tyrosine kinase 11 5743 11287 Pzp pregnancy zone protein 6 4681 -11298 Aanat arylalkylamine N-acetyltransferase 11 1455 -11302 Aatk apoptosis-associated tyrosine kinase 11 5743 11303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 4 10260 11304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 3 7248 -11305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 2 8061 \ No newline at end of file +11305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 2 8061 +11298 Aanat arylalkylamine N-acetyltransferase 11 1455
--- a/test-data/limma-trend_Mut-WT.tsv Wed Jan 31 12:45:42 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -"logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" -"11304" 0.454006866294872 15.5347733798565 5.73783569533779 6.51380077190898e-06 3.90828046314539e-05 9.50615253604686 -"11287" 0.189211879829905 17.6560194022488 4.52571052740792 0.000138730279836171 0.000416190839508512 3.37443017677096 -"11298" -0.137906115120717 17.6760627799956 -3.28370943058883 0.00313397342215802 0.00626794684431604 -1.40612492217495 -"11303" -0.0565048762006306 17.887811868748 -1.22181580605621 0.233642901495507 0.35046435224326 -5.9848993971045 -"11305" -0.0597744562444475 18.1592280697576 -1.04836609029276 0.304913314924737 0.365895977909685 -6.17897539114193 -"11302" -0.0455235218343741 10.2501491061291 -0.373618198821297 0.711968519770644 0.711968519770644 -6.65188039014484
--- a/test-data/limma-voom_Mut-WT.tsv Wed Jan 31 12:45:42 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -"GeneID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" -"11304" 0.457332061341022 15.5254133001226 6.50459574633635 9.98720685007144e-07 5.99232411004286e-06 14.0741948485867 -"11287" 0.190749727701785 17.6546448244617 5.09535410066427 3.26518807653921e-05 9.79556422961764e-05 5.46773893802514 -"11298" -0.138014418336201 17.6747285193431 -3.3316848584234 0.00278753263633103 0.00557506527266206 -1.84301342041422 -"11303" -0.0558958943607024 17.886791401216 -1.30108531275582 0.205582481502277 0.254491025872966 -6.49241240578 -"11305" -0.0606991650996669 18.1585474109909 -1.28203791127301 0.212075854894138 0.254491025872966 -6.42090197700496 -"11302" -0.035023968220445 9.78883119065989 -0.236945963165271 0.814709535394086 0.814709535394086 -6.09497670655939
--- a/test-data/limma-voom_Mut-WT_2fact.tsv Wed Jan 31 12:45:42 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -"GeneID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" -"11287" 0.190521683937302 17.6546448244617 14.4538942795641 5.93483727833391e-09 3.56090236700035e-08 96.5864491179047 -"11298" -0.13987741320075 17.6747285193431 -7.7190169464239 5.41139783114199e-06 1.6234193493426e-05 22.3495333961476 -"11304" 0.459011254726059 15.5254133001226 5.6408319840902 0.00010884906092378 0.000217698121847559 8.76657226635715 -"11303" -0.0641599901594248 17.886791401216 -2.9900818153928 0.0112725655419901 0.0169088483129851 -2.70089035288878 -"11305" -0.0651479753016169 18.1585474109909 -2.28935282063866 0.0409794711446066 0.0491753653735279 -4.2713352660451 -"11302" -0.0358817134644287 9.78883119065989 -0.439436789626843 0.668154169055813 0.668154169055813 -5.75838315483931
--- a/test-data/limma-voom_Mut-WT_2fact_anno.tsv Wed Jan 31 12:45:42 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" -11287 "Pzp" "pregnancy zone protein" 6 4681 0.190521683937302 17.6546448244617 15.1152647604854 3.5605743515492e-09 2.13634461092952e-08 106.319792304784 -11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 -0.139877413200757 17.6747285193431 -8.49164953574146 2.03124167559177e-06 6.0937250267753e-06 28.5167891813121 -11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 0.459011254726057 15.5254133001226 5.60316118539212 0.000115556485215252 0.000231112970430504 8.56249981126828 -11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 -0.0641599901594141 17.886791401216 -2.92798536696768 0.0126512435710263 0.0189768653565394 -2.86386681629442 -11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 -0.0651479753016169 18.1585474109909 -2.03911882030581 0.0640903140266962 0.0769083768320354 -4.69855556350315 -11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 -0.0358817134644287 9.78883119065989 -0.43940476739557 0.668176726641997 0.668176726641997 -5.75841999159995
--- a/test-data/limma-voom_Mut-WT_anno.tsv Wed Jan 31 12:45:42 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" -11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 0.457332061341022 15.5254133001226 6.50459574633635 9.98720685007144e-07 5.99232411004286e-06 14.0741948485867 -11287 "Pzp" "pregnancy zone protein" 6 4681 0.190749727701785 17.6546448244617 5.09535410066427 3.26518807653921e-05 9.79556422961764e-05 5.46773893802514 -11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 -0.138014418336201 17.6747285193431 -3.3316848584234 0.00278753263633103 0.00557506527266206 -1.84301342041422 -11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 -0.0558958943607024 17.886791401216 -1.30108531275582 0.205582481502277 0.254491025872966 -6.49241240578 -11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 -0.0606991650996669 18.1585474109909 -1.28203791127301 0.212075854894138 0.254491025872966 -6.42090197700496 -11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 -0.035023968220445 9.78883119065989 -0.236945963165271 0.814709535394086 0.814709535394086 -6.09497670655939
--- a/test-data/limma-voom_WT-Mut.tsv Wed Jan 31 12:45:42 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -"GeneID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" -"11304" -0.457332061341022 15.5254133001226 -6.50459574633635 9.98720685007144e-07 5.99232411004286e-06 14.0741948485867 -"11287" -0.190749727701785 17.6546448244617 -5.09535410066427 3.26518807653921e-05 9.79556422961764e-05 5.46773893802514 -"11298" 0.138014418336201 17.6747285193431 3.3316848584234 0.00278753263633103 0.00557506527266206 -1.84301342041422 -"11303" 0.0558958943607024 17.886791401216 1.30108531275582 0.205582481502277 0.254491025872966 -6.49241240578 -"11305" 0.0606991650996669 18.1585474109909 1.28203791127301 0.212075854894138 0.254491025872966 -6.42090197700496 -"11302" 0.035023968220445 9.78883119065989 0.236945963165271 0.814709535394086 0.814709535394086 -6.09497670655939
--- a/test-data/limma-voom_WT-Mut_2fact_anno.tsv Wed Jan 31 12:45:42 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B" -11287 "Pzp" "pregnancy zone protein" 6 4681 -0.190521683937302 17.6546448244617 -15.1152647604854 3.5605743515492e-09 2.13634461092952e-08 106.319792304784 -11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 0.139877413200757 17.6747285193431 8.49164953574146 2.03124167559177e-06 6.0937250267753e-06 28.5167891813121 -11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 -0.459011254726057 15.5254133001226 -5.60316118539212 0.000115556485215252 0.000231112970430504 8.56249981126828 -11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 0.0641599901594141 17.886791401216 2.92798536696768 0.0126512435710263 0.0189768653565394 -2.86386681629442 -11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 0.0651479753016169 18.1585474109909 2.03911882030581 0.0640903140266962 0.0769083768320354 -4.69855556350315 -11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 0.0358817134644287 9.78883119065989 0.43940476739557 0.668176726641997 0.668176726641997 -5.75841999159995
--- a/test-data/limma-voom_normcounts.tsv Wed Jan 31 12:45:42 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -"GeneID" "Mut1" "Mut2" "Mut3" "WT1" "WT2" "WT3" -"11287" 17.7719335903598 17.7105244453357 17.7658460810258 17.6076545522668 17.5079905058358 17.5639197719462 -"11298" 17.6506532355915 17.5520012519588 17.6144324004081 17.7727137985373 17.6986875511432 17.7598828784201 -"11302" 9.71615817858834 9.91760904198188 9.67886550454371 9.57719962386619 10.0175525101992 9.82560228478005 -"11303" 17.8774046020864 17.7865502833631 17.911613462219 17.9125899785601 17.8773613214092 17.955228759658 -"11304" 15.7545783969022 15.8519803740125 15.6561454280436 15.354518172169 15.2178020484983 15.3174553811097 -"11305" 18.0401341021246 18.1408682071359 18.204920085011 18.1808259688524 18.1818679259847 18.2026681768366
--- a/test-data/limma-voom_normcounts_anno.tsv Wed Jan 31 12:45:42 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -"EntrezID" "Symbol" "GeneName" "Chr" "Length" "Mut1" "Mut2" "Mut3" "WT1" "WT2" "WT3" -11287 "Pzp" "pregnancy zone protein" 6 4681 17.7719335903598 17.7105244453357 17.7658460810258 17.6076545522668 17.5079905058358 17.5639197719462 -11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 17.6506532355915 17.5520012519588 17.6144324004081 17.7727137985373 17.6986875511432 17.7598828784201 -11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 9.71615817858834 9.91760904198188 9.67886550454371 9.57719962386619 10.0175525101992 9.82560228478005 -11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 17.8774046020864 17.7865502833631 17.911613462219 17.9125899785601 17.8773613214092 17.955228759658 -11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 15.7545783969022 15.8519803740125 15.6561454280436 15.354518172169 15.2178020484983 15.3174553811097 -11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 18.0401341021246 18.1408682071359 18.204920085011 18.1808259688524 18.1818679259847 18.2026681768366
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_rscript.txt Sun Apr 29 17:36:42 2018 -0400 @@ -0,0 +1,869 @@ +# This tool takes in a matrix of feature counts as well as gene annotations and +# outputs a table of top expressions as well as various plots for differential +# expression analysis +# +# ARGS: htmlPath", "R", 1, "character" -Path to html file linking to other outputs +# outPath", "o", 1, "character" -Path to folder to write all output to +# filesPath", "j", 2, "character" -JSON list object if multiple files input +# matrixPath", "m", 2, "character" -Path to count matrix +# factFile", "f", 2, "character" -Path to factor information file +# factInput", "i", 2, "character" -String containing factors if manually input +# annoPath", "a", 2, "character" -Path to input containing gene annotations +# contrastData", "C", 1, "character" -String containing contrasts of interest +# cpmReq", "c", 2, "double" -Float specifying cpm requirement +# cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement +# sampleReq", "s", 2, "integer" -Integer specifying cpm requirement +# normCounts", "x", 0, "logical" -String specifying if normalised counts should be output +# rdaOpt", "r", 0, "logical" -String specifying if RData should be output +# lfcReq", "l", 1, "double" -Float specifying the log-fold-change requirement +# pValReq", "p", 1, "double" -Float specifying the p-value requirement +# pAdjOpt", "d", 1, "character" -String specifying the p-value adjustment method +# normOpt", "n", 1, "character" -String specifying type of normalisation used +# robOpt", "b", 0, "logical" -String specifying if robust options should be used +# trend", "t", 1, "double" -Float for prior.count if limma-trend is used instead of voom +# weightOpt", "w", 0, "logical" -String specifying if voomWithQualityWeights should be used +# +# OUT: +# MDS Plot +# Voom/SA plot +# MD Plot +# Expression Table +# HTML file linking to the ouputs +# Optional: +# Normalised counts Table +# RData file +# +# +# Author: Shian Su - registertonysu@gmail.com - Jan 2014 +# Modified by: Maria Doyle - Jun 2017, Jan 2018 + +# Record starting time +timeStart <- as.character(Sys.time()) + +# Load all required libraries +library(methods, quietly=TRUE, warn.conflicts=FALSE) +library(statmod, quietly=TRUE, warn.conflicts=FALSE) +library(splines, quietly=TRUE, warn.conflicts=FALSE) +library(edgeR, quietly=TRUE, warn.conflicts=FALSE) +library(limma, quietly=TRUE, warn.conflicts=FALSE) +library(scales, quietly=TRUE, warn.conflicts=FALSE) +library(getopt, quietly=TRUE, warn.conflicts=FALSE) + +if (packageVersion("limma") < "3.20.1") { + stop("Please update 'limma' to version >= 3.20.1 to run this tool") +} + +################################################################################ +### Function Delcaration +################################################################################ +# Function to sanitise contrast equations so there are no whitespaces +# surrounding the arithmetic operators, leading or trailing whitespace +sanitiseEquation <- function(equation) { + equation <- gsub(" *[+] *", "+", equation) + equation <- gsub(" *[-] *", "-", equation) + equation <- gsub(" *[/] *", "/", equation) + equation <- gsub(" *[*] *", "*", equation) + equation <- gsub("^\\s+|\\s+$", "", equation) + return(equation) +} + +# Function to sanitise group information +sanitiseGroups <- function(string) { + string <- gsub(" *[,] *", ",", string) + string <- gsub("^\\s+|\\s+$", "", string) + return(string) +} + +# Function to change periods to whitespace in a string +unmake.names <- function(string) { + string <- gsub(".", " ", string, fixed=TRUE) + return(string) +} + +# Generate output folder and paths +makeOut <- function(filename) { + return(paste0(opt$outPath, "/", filename)) +} + +# Generating design information +pasteListName <- function(string) { + return(paste0("factors$", string)) +} + +# Create cata function: default path set, default seperator empty and appending +# true by default (Ripped straight from the cat function with altered argument +# defaults) +cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL, + append = TRUE) { + if (is.character(file)) + if (file == "") + file <- stdout() + else if (substring(file, 1L, 1L) == "|") { + file <- pipe(substring(file, 2L), "w") + on.exit(close(file)) + } + else { + file <- file(file, ifelse(append, "a", "w")) + on.exit(close(file)) + } + .Internal(cat(list(...), file, sep, fill, labels, append)) +} + +# Function to write code for html head and title +HtmlHead <- function(title) { + cata("<head>\n") + cata("<title>", title, "</title>\n") + cata("</head>\n") +} + +# Function to write code for html links +HtmlLink <- function(address, label=address) { + cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n") +} + +# Function to write code for html images +HtmlImage <- function(source, label=source, height=600, width=600) { + cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height) + cata("\" width=\"", width, "\"/>\n") +} + +# Function to write code for html list items +ListItem <- function(...) { + cata("<li>", ..., "</li>\n") +} + +TableItem <- function(...) { + cata("<td>", ..., "</td>\n") +} + +TableHeadItem <- function(...) { + cata("<th>", ..., "</th>\n") +} + +################################################################################ +### Input Processing +################################################################################ + +# Collect arguments from command line +args <- commandArgs(trailingOnly=TRUE) + +# Get options, using the spec as defined by the enclosed list. +# Read the options from the default: commandArgs(TRUE). +spec <- matrix(c( + "htmlPath", "R", 1, "character", + "outPath", "o", 1, "character", + "filesPath", "j", 2, "character", + "matrixPath", "m", 2, "character", + "factFile", "f", 2, "character", + "factInput", "i", 2, "character", + "annoPath", "a", 2, "character", + "contrastData", "C", 1, "character", + "cpmReq", "c", 1, "double", + "totReq", "y", 0, "logical", + "cntReq", "z", 1, "integer", + "sampleReq", "s", 1, "integer", + "normCounts", "x", 0, "logical", + "rdaOpt", "r", 0, "logical", + "lfcReq", "l", 1, "double", + "pValReq", "p", 1, "double", + "pAdjOpt", "d", 1, "character", + "normOpt", "n", 1, "character", + "robOpt", "b", 0, "logical", + "trend", "t", 1, "double", + "weightOpt", "w", 0, "logical"), + byrow=TRUE, ncol=4) +opt <- getopt(spec) + + +if (is.null(opt$matrixPath) & is.null(opt$filesPath)) { + cat("A counts matrix (or a set of counts files) is required.\n") + q(status=1) +} + +if (is.null(opt$cpmReq)) { + filtCPM <- FALSE +} else { + filtCPM <- TRUE +} + +if (is.null(opt$cntReq) || is.null(opt$sampleReq)) { + filtSmpCount <- FALSE +} else { + filtSmpCount <- TRUE +} + +if (is.null(opt$totReq)) { + filtTotCount <- FALSE +} else { + filtTotCount <- TRUE +} + +if (is.null(opt$rdaOpt)) { + wantRda <- FALSE +} else { + wantRda <- TRUE +} + +if (is.null(opt$annoPath)) { + haveAnno <- FALSE +} else { + haveAnno <- TRUE +} + +if (is.null(opt$normCounts)) { + wantNorm <- FALSE +} else { + wantNorm <- TRUE +} + +if (is.null(opt$robOpt)) { + wantRobust <- FALSE +} else { + wantRobust <- TRUE +} + +if (is.null(opt$weightOpt)) { + wantWeight <- FALSE +} else { + wantWeight <- TRUE +} + +if (is.null(opt$trend)) { + wantTrend <- FALSE + deMethod <- "limma-voom" +} else { + wantTrend <- TRUE + deMethod <- "limma-trend" + priorCount <- opt$trend +} + + +if (!is.null(opt$filesPath)) { + # Process the separate count files (adapted from DESeq2 wrapper) + library("rjson") + parser <- newJSONParser() + parser$addData(opt$filesPath) + factorList <- parser$getObject() + factors <- sapply(factorList, function(x) x[[1]]) + filenamesIn <- unname(unlist(factorList[[1]][[2]])) + sampleTable <- data.frame(sample=basename(filenamesIn), + filename=filenamesIn, + row.names=filenamesIn, + stringsAsFactors=FALSE) + for (factor in factorList) { + factorName <- factor[[1]] + sampleTable[[factorName]] <- character(nrow(sampleTable)) + lvls <- sapply(factor[[2]], function(x) names(x)) + for (i in seq_along(factor[[2]])) { + files <- factor[[2]][[i]][[1]] + sampleTable[files,factorName] <- lvls[i] + } + sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) + } + rownames(sampleTable) <- sampleTable$sample + rem <- c("sample","filename") + factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE] + + #read in count files and create single table + countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)}) + counts <- do.call("cbind", countfiles) + +} else { + # Process the single count matrix + counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) + row.names(counts) <- counts[, 1] + counts <- counts[ , -1] + countsRows <- nrow(counts) + + # Process factors + if (is.null(opt$factInput)) { + factorData <- read.table(opt$factFile, header=TRUE, sep="\t") + factors <- factorData[, -1, drop=FALSE] + } else { + factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) + factorData <- list() + for (fact in factors) { + newFact <- unlist(strsplit(fact, split="::")) + factorData <- rbind(factorData, newFact) + } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. + + # Set the row names to be the name of the factor and delete first row + row.names(factorData) <- factorData[, 1] + factorData <- factorData[, -1] + factorData <- sapply(factorData, sanitiseGroups) + factorData <- sapply(factorData, strsplit, split=",") + factorData <- sapply(factorData, make.names) + # Transform factor data into data frame of R factor objects + factors <- data.frame(factorData) + } +} + + # if annotation file provided +if (haveAnno) { + geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) +} + +#Create output directory +dir.create(opt$outPath, showWarnings=FALSE) + +# Split up contrasts seperated by comma into a vector then sanitise +contrastData <- unlist(strsplit(opt$contrastData, split=",")) +contrastData <- sanitiseEquation(contrastData) +contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) + + +mdsOutPdf <- makeOut("mdsplot_nonorm.pdf") +mdsOutPng <- makeOut("mdsplot_nonorm.png") +nmdsOutPdf <- makeOut("mdsplot.pdf") +nmdsOutPng <- makeOut("mdsplot.png") +maOutPdf <- character() # Initialise character vector +maOutPng <- character() +topOut <- character() +for (i in 1:length(contrastData)) { + maOutPdf[i] <- makeOut(paste0("maplot_", contrastData[i], ".pdf")) + maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png")) + topOut[i] <- makeOut(paste0(deMethod, "_", contrastData[i], ".tsv")) +} +normOut <- makeOut(paste0(deMethod, "_normcounts.tsv")) +rdaOut <- makeOut(paste0(deMethod, "_analysis.RData")) +sessionOut <- makeOut("session_info.txt") + +# Initialise data for html links and images, data frame with columns Label and +# Link +linkData <- data.frame(Label=character(), Link=character(), + stringsAsFactors=FALSE) +imageData <- data.frame(Label=character(), Link=character(), + stringsAsFactors=FALSE) + +# Initialise vectors for storage of up/down/neutral regulated counts +upCount <- numeric() +downCount <- numeric() +flatCount <- numeric() + +################################################################################ +### Data Processing +################################################################################ + +# Extract counts and annotation data +print("Extracting counts") +data <- list() +data$counts <- counts +if (haveAnno) { + # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno) + annoord <- geneanno[match(row.names(counts), geneanno[,1]), ] + data$genes <- annoord +} else { + data$genes <- data.frame(GeneID=row.names(counts)) +} + +# If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of +# samples. Default is no filtering +preFilterCount <- nrow(data$counts) + +if (filtCPM || filtSmpCount || filtTotCount) { + + if (filtTotCount) { + keep <- rowSums(data$counts) >= opt$cntReq + } else if (filtSmpCount) { + keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq + } else if (filtCPM) { + keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq + } + + data$counts <- data$counts[keep, ] + data$genes <- data$genes[keep, , drop=FALSE] +} + +postFilterCount <- nrow(data$counts) +filteredCount <- preFilterCount-postFilterCount + +# Creating naming data +samplenames <- colnames(data$counts) +sampleanno <- data.frame("sampleID"=samplenames, factors) + + +# Generating the DGEList object "data" +print("Generating DGEList object") +data$samples <- sampleanno +data$samples$lib.size <- colSums(data$counts) +data$samples$norm.factors <- 1 +row.names(data$samples) <- colnames(data$counts) +data <- new("DGEList", data) + +print("Generating Design") +# Name rows of factors according to their sample +row.names(factors) <- names(data$counts) +factorList <- sapply(names(factors), pasteListName) +formula <- "~0" +for (i in 1:length(factorList)) { + formula <- paste(formula,factorList[i], sep="+") +} +formula <- formula(formula) +design <- model.matrix(formula) +for (i in 1:length(factorList)) { + colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE) +} + +# Calculating normalising factors +print("Calculating Normalisation Factors") +data <- calcNormFactors(data, method=opt$normOpt) + +# Generate contrasts information +print("Generating Contrasts") +contrasts <- makeContrasts(contrasts=contrastData, levels=design) + +################################################################################ +### Data Output +################################################################################ +# Plot MDS +print("Generating MDS plot") +labels <- names(counts) +png(mdsOutPng, width=600, height=600) +# Currently only using a single factor +plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (unnormalised)") +imageData[1, ] <- c("MDS Plot (unnormalised)", "mdsplot_nonorm.png") +invisible(dev.off()) + +pdf(mdsOutPdf) +plotMDS(data, labels=labels, cex=0.5) +linkData[1, ] <- c("MDS Plot (unnormalised).pdf", "mdsplot_nonorm.pdf") +invisible(dev.off()) + +if (wantTrend) { + # limma-trend approach + logCPM <- cpm(data, log=TRUE, prior.count=opt$trend) + fit <- lmFit(logCPM, design) + fit$genes <- data$genes + fit <- contrasts.fit(fit, contrasts) + if (wantRobust) { + fit <- eBayes(fit, trend=TRUE, robust=TRUE) + } else { + fit <- eBayes(fit, trend=TRUE, robust=FALSE) + } + # plot fit with plotSA + saOutPng <- makeOut("saplot.png") + saOutPdf <- makeOut("saplot.pdf") + + png(saOutPng, width=600, height=600) + plotSA(fit, main="SA Plot") + imgName <- "SA Plot.png" + imgAddr <- "saplot.png" + imageData <- rbind(imageData, c(imgName, imgAddr)) + invisible(dev.off()) + + pdf(saOutPdf, width=14) + plotSA(fit, main="SA Plot") + linkName <- paste0("SA Plot.pdf") + linkAddr <- paste0("saplot.pdf") + linkData <- rbind(linkData, c(linkName, linkAddr)) + invisible(dev.off()) + + plotData <- logCPM + + # Save normalised counts (log2cpm) + if (wantNorm) { + write.table(logCPM, file=normOut, row.names=TRUE, sep="\t", quote=FALSE) + linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv")))) + } +} else { + # limma-voom approach + voomOutPdf <- makeOut("voomplot.pdf") + voomOutPng <- makeOut("voomplot.png") + + if (wantWeight) { + # Creating voom data object and plot + png(voomOutPng, width=1000, height=600) + vData <- voomWithQualityWeights(data, design=design, plot=TRUE) + imgName <- "Voom Plot.png" + imgAddr <- "voomplot.png" + imageData <- rbind(imageData, c(imgName, imgAddr)) + invisible(dev.off()) + + pdf(voomOutPdf, width=14) + vData <- voomWithQualityWeights(data, design=design, plot=TRUE) + linkName <- paste0("Voom Plot.pdf") + linkAddr <- paste0("voomplot.pdf") + linkData <- rbind(linkData, c(linkName, linkAddr)) + invisible(dev.off()) + + # Generating fit data and top table with weights + wts <- vData$weights + voomFit <- lmFit(vData, design, weights=wts) + + } else { + # Creating voom data object and plot + png(voomOutPng, width=600, height=600) + vData <- voom(data, design=design, plot=TRUE) + imgName <- "Voom Plot" + imgAddr <- "voomplot.png" + imageData <- rbind(imageData, c(imgName, imgAddr)) + invisible(dev.off()) + + pdf(voomOutPdf) + vData <- voom(data, design=design, plot=TRUE) + linkName <- paste0("Voom Plot.pdf") + linkAddr <- paste0("voomplot.pdf") + linkData <- rbind(linkData, c(linkName, linkAddr)) + invisible(dev.off()) + + # Generate voom fit + voomFit <- lmFit(vData, design) + } + + # Save normalised counts (log2cpm) + if (wantNorm) { + norm_counts <- data.frame(vData$genes, vData$E) + write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE) + linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv")))) + } + + # Fit linear model and estimate dispersion with eBayes + voomFit <- contrasts.fit(voomFit, contrasts) + if (wantRobust) { + fit <- eBayes(voomFit, robust=TRUE) + } else { + fit <- eBayes(voomFit, robust=FALSE) + } + plotData <- vData +} + +print("Generating normalised MDS plot") +png(nmdsOutPng, width=600, height=600) +# Currently only using a single factor +plotMDS(plotData, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (normalised)") +imgName <- "MDS Plot (normalised)" +imgAddr <- "mdsplot.png" +imageData <- rbind(imageData, c(imgName, imgAddr)) +invisible(dev.off()) + +pdf(nmdsOutPdf) +plotMDS(plotData, labels=labels, cex=0.5) +linkName <- paste0("MDS Plot (normalised).pdf") +linkAddr <- paste0("mdsplot.pdf") +linkData <- rbind(linkData, c(linkName, linkAddr)) +invisible(dev.off()) + + +print("Generating DE results") +status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq, + lfc=opt$lfcReq) +sumStatus <- summary(status) + +for (i in 1:length(contrastData)) { + # Collect counts for differential expression + upCount[i] <- sumStatus["Up", i] + downCount[i] <- sumStatus["Down", i] + flatCount[i] <- sumStatus["NotSig", i] + + # Write top expressions table + top <- topTable(fit, coef=i, number=Inf, sort.by="P") + write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE) + + linkName <- paste0(deMethod, "_", contrastData[i], ".tsv") + linkAddr <- paste0(deMethod, "_", contrastData[i], ".tsv") + linkData <- rbind(linkData, c(linkName, linkAddr)) + + # Plot MA (log ratios vs mean average) using limma package on weighted + pdf(maOutPdf[i]) + limma::plotMD(fit, status=status, coef=i, + main=paste("MA Plot:", unmake.names(contrastData[i])), + col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), + xlab="Average Expression", ylab="logFC") + + abline(h=0, col="grey", lty=2) + + linkName <- paste0("MA Plot_", contrastData[i], " (.pdf)") + linkAddr <- paste0("maplot_", contrastData[i], ".pdf") + linkData <- rbind(linkData, c(linkName, linkAddr)) + invisible(dev.off()) + + png(maOutPng[i], height=600, width=600) + limma::plotMD(fit, status=status, coef=i, + main=paste("MA Plot:", unmake.names(contrastData[i])), + col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), + xlab="Average Expression", ylab="logFC") + + abline(h=0, col="grey", lty=2) + + imgName <- paste0("MA Plot_", contrastData[i]) + imgAddr <- paste0("maplot_", contrastData[i], ".png") + imageData <- rbind(imageData, c(imgName, imgAddr)) + invisible(dev.off()) +} +sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) +row.names(sigDiff) <- contrastData + +# Save relevant items as rda object +if (wantRda) { + print("Saving RData") + if (wantWeight) { + save(data, status, plotData, labels, factors, wts, fit, top, contrasts, + design, + file=rdaOut, ascii=TRUE) + } else { + save(data, status, plotData, labels, factors, fit, top, contrasts, design, + file=rdaOut, ascii=TRUE) + } + linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData")))) +} + +# Record session info +writeLines(capture.output(sessionInfo()), sessionOut) +linkData <- rbind(linkData, c("Session Info", "session_info.txt")) + +# Record ending time and calculate total run time +timeEnd <- as.character(Sys.time()) +timeTaken <- capture.output(round(difftime(timeEnd,timeStart), digits=3)) +timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE) +################################################################################ +### HTML Generation +################################################################################ + +# Clear file +cat("", file=opt$htmlPath) + +cata("<html>\n") + +cata("<body>\n") +cata("<h3>Limma Analysis Output:</h3>\n") +cata("Links to PDF copies of plots are in 'Plots' section below />\n") +if (wantWeight) { + HtmlImage(imageData$Link[1], imageData$Label[1], width=1000) +} else { + HtmlImage(imageData$Link[1], imageData$Label[1]) +} + +for (i in 2:nrow(imageData)) { + HtmlImage(imageData$Link[i], imageData$Label[i]) +} + +cata("<h4>Differential Expression Counts:</h4>\n") + +cata("<table border=\"1\" cellpadding=\"4\">\n") +cata("<tr>\n") +TableItem() +for (i in colnames(sigDiff)) { + TableHeadItem(i) +} +cata("</tr>\n") +for (i in 1:nrow(sigDiff)) { + cata("<tr>\n") + TableHeadItem(unmake.names(row.names(sigDiff)[i])) + for (j in 1:ncol(sigDiff)) { + TableItem(as.character(sigDiff[i, j])) + } + cata("</tr>\n") +} +cata("</table>") + +cata("<h4>Plots:</h4>\n") +for (i in 1:nrow(linkData)) { + if (grepl(".pdf", linkData$Link[i])) { + HtmlLink(linkData$Link[i], linkData$Label[i]) + } +} + +cata("<h4>Tables:</h4>\n") +for (i in 1:nrow(linkData)) { + if (grepl(".tsv", linkData$Link[i])) { + HtmlLink(linkData$Link[i], linkData$Label[i]) + } +} + +if (wantRda) { + cata("<h4>R Data Object:</h4>\n") + for (i in 1:nrow(linkData)) { + if (grepl(".RData", linkData$Link[i])) { + HtmlLink(linkData$Link[i], linkData$Label[i]) + } + } +} + +cata("<p>Alt-click links to download file.</p>\n") +cata("<p>Click floppy disc icon associated history item to download ") +cata("all files.</p>\n") +cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") + +cata("<h4>Additional Information</h4>\n") +cata("<ul>\n") + +if (filtCPM || filtSmpCount || filtTotCount) { + if (filtCPM) { + tempStr <- paste("Genes without more than", opt$cmpReq, + "CPM in at least", opt$sampleReq, "samples are insignificant", + "and filtered out.") + } else if (filtSmpCount) { + tempStr <- paste("Genes without more than", opt$cntReq, + "counts in at least", opt$sampleReq, "samples are insignificant", + "and filtered out.") + } else if (filtTotCount) { + tempStr <- paste("Genes without more than", opt$cntReq, + "counts, after summing counts for all samples, are insignificant", + "and filtered out.") + } + + ListItem(tempStr) + filterProp <- round(filteredCount/preFilterCount*100, digits=2) + tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp, + "%) genes were filtered out for low expression.") + ListItem(tempStr) +} +ListItem(opt$normOpt, " was the method used to normalise library sizes.") +if (wantTrend) { + ListItem("The limma-trend method was used.") +} else { + ListItem("The limma-voom method was used.") +} +if (wantWeight) { + ListItem("Weights were applied to samples.") +} else { + ListItem("Weights were not applied to samples.") +} +if (wantRobust) { + ListItem("eBayes was used with robust settings (robust=TRUE).") +} +if (opt$pAdjOpt!="none") { + if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") { + tempStr <- paste0("MA-Plot highlighted genes are significant at FDR ", + "of ", opt$pValReq," and exhibit log2-fold-change of at ", + "least ", opt$lfcReq, ".") + ListItem(tempStr) + } else if (opt$pAdjOpt=="holm") { + tempStr <- paste0("MA-Plot highlighted genes are significant at adjusted ", + "p-value of ", opt$pValReq," by the Holm(1979) ", + "method, and exhibit log2-fold-change of at least ", + opt$lfcReq, ".") + ListItem(tempStr) + } + } else { + tempStr <- paste0("MA-Plot highlighted genes are significant at p-value ", + "of ", opt$pValReq," and exhibit log2-fold-change of at ", + "least ", opt$lfcReq, ".") + ListItem(tempStr) +} +cata("</ul>\n") + +cata("<h4>Summary of experimental data:</h4>\n") + +cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\n") + +cata("<table border=\"1\" cellpadding=\"3\">\n") +cata("<tr>\n") +TableHeadItem("SampleID") +TableHeadItem(names(factors)[1]," (Primary Factor)") + +if (ncol(factors) > 1) { + for (i in names(factors)[2:length(names(factors))]) { + TableHeadItem(i) + } + cata("</tr>\n") +} + +for (i in 1:nrow(factors)) { + cata("<tr>\n") + TableHeadItem(row.names(factors)[i]) + for (j in 1:ncol(factors)) { + TableItem(as.character(unmake.names(factors[i, j]))) + } + cata("</tr>\n") +} +cata("</table>") + +cit <- character() +link <- character() +link[1] <- paste0("<a href=\"", + "http://www.bioconductor.org/packages/release/bioc/", + "vignettes/limma/inst/doc/usersguide.pdf", + "\">", "limma User's Guide", "</a>.") + +link[2] <- paste0("<a href=\"", + "http://www.bioconductor.org/packages/release/bioc/", + "vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf", + "\">", "edgeR User's Guide", "</a>") + +cit[1] <- paste("Please cite the following paper for this tool:") + +cit[2] <- paste("Liu R, Holik AZ, Su S, Jansz N, Chen K, Leong HS, Blewitt ME,", + "Asselin-Labat ML, Smyth GK, Ritchie ME (2015). Why weight? ", + "Modelling sample and observational level variability improves power ", + "in RNA-seq analyses. Nucleic Acids Research, 43(15), e97.") + +cit[3] <- paste("Please cite the paper below for the limma software itself.", + "Please also try to cite the appropriate methodology articles", + "that describe the statistical methods implemented in limma,", + "depending on which limma functions you are using. The", + "methodology articles are listed in Section 2.1 of the", + link[1], + "Cite no. 3 only if sample weights were used.") +cit[4] <- paste("Smyth GK (2005). Limma: linear models for microarray data.", + "In: 'Bioinformatics and Computational Biology Solutions using", + "R and Bioconductor'. R. Gentleman, V. Carey, S. doit,.", + "Irizarry, W. Huber (eds), Springer, New York, pages 397-420.") +cit[5] <- paste("Please cite the first paper for the software itself and the", + "other papers for the various original statistical methods", + "implemented in edgeR. See Section 1.2 in the", link[2], + "for more detail.") +cit[6] <- paste("Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a", + "Bioconductor package for differential expression analysis", + "of digital gene expression data. Bioinformatics 26, 139-140") +cit[7] <- paste("Robinson MD and Smyth GK (2007). Moderated statistical tests", + "for assessing differences in tag abundance. Bioinformatics", + "23, 2881-2887") +cit[8] <- paste("Robinson MD and Smyth GK (2008). Small-sample estimation of", + "negative binomial dispersion, with applications to SAGE data.", + "Biostatistics, 9, 321-332") +cit[9] <- paste("McCarthy DJ, Chen Y and Smyth GK (2012). Differential", + "expression analysis of multifactor RNA-Seq experiments with", + "respect to biological variation. Nucleic Acids Research 40,", + "4288-4297") +cit[10] <- paste("Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:", + "precision weights unlock linear model analysis tools for", + "RNA-seq read counts. Genome Biology 15, R29.") +cit[11] <- paste("Ritchie ME, Diyagama D, Neilson J, van Laar R,", + "Dobrovic A, Holloway A and Smyth GK (2006).", + "Empirical array quality weights for microarray data.", + "BMC Bioinformatics 7, Article 261.") +cata("<h3>Citations</h3>\n") +cata(cit[1], "\n") +cata("<br>\n") +cata(cit[2], "\n") + +cata("<h4>limma</h4>\n") +cata(cit[3], "\n") +cata("<ol>\n") +ListItem(cit[4]) +ListItem(cit[10]) +ListItem(cit[11]) +cata("</ol>\n") + +cata("<h4>edgeR</h4>\n") +cata(cit[5], "\n") +cata("<ol>\n") +ListItem(cit[6]) +ListItem(cit[7]) +ListItem(cit[8]) +ListItem(cit[9]) +cata("</ol>\n") + +cata("<p>Please report problems or suggestions to: su.s@wehi.edu.au</p>\n") + +for (i in 1:nrow(linkData)) { + if (grepl("session_info", linkData$Link[i])) { + HtmlLink(linkData$Link[i], linkData$Label[i]) + } +} + +cata("<table border=\"0\">\n") +cata("<tr>\n") +TableItem("Task started at:"); TableItem(timeStart) +cata("</tr>\n") +cata("<tr>\n") +TableItem("Task ended at:"); TableItem(timeEnd) +cata("</tr>\n") +cata("<tr>\n") +TableItem("Task run time:"); TableItem(timeTaken) +cata("<tr>\n") +cata("</table>\n") + +cata("</body>\n") +cata("</html>")