Repository 'shrnaseq'
hg clone https://toolshed.g2.bx.psu.edu/repos/shians/shrnaseq

Changeset 6:3d04308a99f9 (2014-04-11)
Previous changeset 5:17befe9f8b03 (2014-02-24) Next changeset 7:91e411fcdecc (2014-04-23)
Commit message:
- Added differentially expressed hairpin count output - Added running time output - Added counts table output
modified:
hairpinTool.R
hairpinTool.xml
b
diff -r 17befe9f8b03 -r 3d04308a99f9 hairpinTool.R
--- a/hairpinTool.R Mon Feb 24 14:50:08 2014 +1100
+++ b/hairpinTool.R Fri Apr 11 17:17:15 2014 +1000
[
b'@@ -1,3 +1,51 @@\n+# ARGS: 1.inputType         -String specifying format of input (fastq or table)\n+#    IF inputType is "fastQ":\n+#       2*.fastqPath        -One or more strings specifying path to fastq files\n+#       2.annoPath        -String specifying path to hairpin annotation table\n+#       3.samplePath        -String specifying path to sample annotation table\n+#       4.barStart          -Integer specifying starting position of barcode\n+#       5.barEnd            -Integer specifying ending position of barcode\n+#       6.hpStart           -Integer specifying startins position of hairpin\n+#                            unique region\n+#       7.hpEnd             -Integer specifying ending position of hairpin\n+#                            unique region\n+#    ###   \n+#    IF inputType is "counts":\n+#       2.countPath         -String specifying path to count table\n+#       3.annoPath          -String specifying path to hairpin annotation table\n+#       4.samplePath        -String specifying path to sample annotation table\n+#    ###\n+#       8.cpmReq            -Float specifying cpm requirement\n+#       9.sampleReq         -Integer specifying cpm requirement\n+#       10.fdrThresh        -Float specifying the FDR requirement\n+#       11.lfcThresh        -Float specifying the log-fold-change requirement\n+#       12.workMode         -String specifying exact test or GLM usage\n+#       13.htmlPath         -String specifying path to HTML file\n+#       14.folderPath       -STring specifying path to folder for output\n+#    IF workMode is "classic" (exact test)\n+#       15.pairData[2]      -String specifying first group for exact test\n+#       16.pairData[1]      -String specifying second group for exact test\n+#    ###\n+#    IF workMode is "glm"\n+#       15.contrastData     -String specifying contrasts to be made\n+#       16.roastOpt         -String specifying usage of gene-wise tests\n+#       17.hairpinReq       -String specifying hairpin requirement for gene-\n+#                            wise test\n+#       18.selectOpt        -String specifying type of selection for barcode\n+#                            plots\n+#       19.selectVals       -String specifying members selected for barcode\n+#                            plots\n+#\n+# OUT:  Bar Plot of Counts Per Index\n+#       Bar Plot of Counts Per Hairpin\n+#       MDS Plot\n+#       Smear Plot\n+#       Barcode Plots (If Genewise testing was selected)\n+#       Top Expression Table\n+#       HTML file linking to the ouputs\n+#\n+# Author: Shian Su - registertonysu@gmail.com - Jan 2014\n+\n # Record starting time\n timeStart <- as.character(Sys.time())\n \n@@ -115,7 +163,7 @@\n   fastqPath <- as.character(gsub("fastq::", "", argv[grepl("fastq::", argv)], \n                                  fixed=TRUE))\n   argv <- argv[!grepl("fastq::", argv, fixed=TRUE)]\n-  hairpinPath <- as.character(argv[2])\n+  annoPath <- as.character(argv[2])\n   samplePath <- as.character(argv[3])\n   barStart <- as.numeric(argv[4])\n   barEnd <- as.numeric(argv[5])\n@@ -134,6 +182,7 @@\n workMode <- as.character(argv[12])\n htmlPath <- as.character(argv[13])\n folderPath <- as.character(argv[14])\n+\n if (workMode=="classic") {\n   pairData <- character()\n   pairData[2] <- as.character(argv[15])\n@@ -147,14 +196,13 @@\n }\n \n # Read in inputs\n-if (inputType=="fastq") {\n-  samples <- read.table(samplePath, header=TRUE, sep="\\t")\n-  hairpins <- read.table(hairpinPath, header=TRUE, sep="\\t")\n-} else if (inputType=="counts") {\n-  samples <- read.table(samplePath, header=TRUE, sep="\\t")\n+\n+samples <- read.table(samplePath, header=TRUE, sep="\\t")\n+anno <- read.table(annoPath, header=TRUE, sep="\\t")\n+if (inputType=="counts") {\n   counts <- read.table(countPath, header=TRUE, sep="\\t")\n-  anno <- read.table(annoPath, header=TRUE, sep="\\t")\n }\n+\n ###################### Check inputs for correctness ############################\n samples$ID <- make.names(samples$ID)\n \n@@ -166,16 +214,16 @@\n   tab <- table(samples$ID)\n   offenders <- paste(names(tab[tab>1'..b'[3], "<br/>\\n")\n+  cata(hpReadout[3], "<br />\\n")\n   cata("<ul>\\n")\n   ListItem(hpReadout[4])\n   ListItem(hpReadout[7])\n   cata("</ul>\\n")\n-  cata(hpReadout[8:11], sep="<br/>\\n")\n+  cata(hpReadout[8:11], sep="<br />\\n")\n   cata("<br />\\n")\n   cata("<b>Please check that read percentages are consistent with ")\n   cata("expectations.</b><br >\\n")\n@@ -582,7 +653,7 @@\n \n cata("<h4>Output:</h4>\\n")\n cata("All images displayed have PDF copy at the bottom of the page, these can ")\n-cata("exported in a pdf viewer to high resolution image format. <br/>\\n")\n+cata("exported in a pdf viewer to high resolution image format. <br />\\n")\n for (i in 1:nrow(imageData)) {\n   if (grepl("barcode", imageData$Link[i])) {\n     if (packageVersion("limma")<"3.19.19") {\n@@ -596,7 +667,7 @@\n     HtmlImage(imageData$Link[i], imageData$Label[i])\n   }\n }\n-cata("<br/>\\n")\n+cata("<br />\\n")\n \n cata("<h4>Plots:</h4>\\n")\n for (i in 1:nrow(linkData)) {\n@@ -617,14 +688,80 @@\n cata("disk icon to download all files in a zip archive.</p>\\n")\n cata("<p>.tsv files are tab seperated files that can be viewed using Excel ")\n cata("or other spreadsheet programs</p>\\n")\n+\n+cata("<h4>Additional Information:</h4>\\n")\n+\n+if (inputType == "fastq") {\n+  ListItem("Data was gathered from fastq raw read file(s).")\n+} else if (inputType == "counts") {\n+  ListItem("Data was gathered from a table of counts.")\n+}\n+\n+if (cpmReq!=0 && sampleReq!=0) {\n+  tempStr <- paste("Hairpins that do not have more than", cpmReq,\n+                   "CPM in at least", sampleReq, "samples are considered",\n+                   "insignificant and filtered out.")\n+  ListItem(tempStr)\n+  filterProp <- round(filteredCount/preFilterCount*100, digits=2)\n+  tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,\n+                   "%) hairpins were filtered out for low count-per-million.")\n+  ListItem(tempStr)\n+}\n+\n+if (workMode == "classic") {\n+  ListItem("An exact test was performed on each hairpin.")\n+} else if (workMode == "glm") {\n+  ListItem("A generalised linear model was fitted to each hairpin.")\n+}\n+\n+\n+\n+cit <- character()\n+link <-character()\n+link[1] <- paste0("<a href=\\"",\n+                  "http://www.bioconductor.org/packages/release/bioc/",\n+                  "vignettes/limma/inst/doc/usersguide.pdf",\n+                  "\\">", "limma User\'s Guide", "</a>.")\n+link[2] <- paste0("<a href=\\"",\n+                  "http://www.bioconductor.org/packages/release/bioc/",\n+                  "vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf",\n+                  "\\">", "edgeR User\'s Guide", "</a>")\n+                  \n+cit[1] <- paste("Robinson MD, McCarthy DJ and Smyth GK (2010).",\n+                "edgeR: a Bioconductor package for differential",\n+                "expression analysis of digital gene expression",\n+                "data. Bioinformatics 26, 139-140")\n+cit[2] <- paste("Robinson MD and Smyth GK (2007). Moderated statistical tests",\n+                "for assessing differences in tag abundance. Bioinformatics",\n+                "23, 2881-2887")\n+cit[3] <- paste("Robinson MD and Smyth GK (2008). Small-sample estimation of",\n+                "negative binomial dispersion, with applications to SAGE data.",\n+                "Biostatistics, 9, 321-332")\n+\n+cit[4] <- paste("McCarthy DJ, Chen Y and Smyth GK (2012). Differential",\n+                "expression analysis of multifactor RNA-Seq experiments with",\n+                "respect to biological variation. Nucleic Acids Research 40,",\n+                "4288-4297")\n+\n+cata("<h4>Citations</h4>")\n+cata("<ol>\\n")\n+ListItem(cit[1])\n+ListItem(cit[2])\n+ListItem(cit[3])\n+ListItem(cit[4])\n+cata("</ol>\\n")\n+\n cata("<table border=\\"0\\">\\n")\n-\n cata("<tr>\\n")\n TableItem("Task started at:"); TableItem(timeStart)\n cata("</tr>\\n")\n cata("<tr>\\n")\n TableItem("Task ended at:"); TableItem(timeEnd)\n cata("</tr>\\n")\n+cata("<tr>\\n")\n+TableItem("Task run time:"); TableItem(timeTaken)\n+cata("<tr>\\n")\n+cata("</table>\\n")\n \n cata("</body>\\n")\n cata("</html>")\n'
b
diff -r 17befe9f8b03 -r 3d04308a99f9 hairpinTool.xml
--- a/hairpinTool.xml Mon Feb 24 14:50:08 2014 +1100
+++ b/hairpinTool.xml Fri Apr 11 17:17:15 2014 +1000
b
@@ -1,11 +1,11 @@
-<tool id="shRNAseq" name="shRNAseq Tool" version="1.0.5">
+<tool id="shRNAseq" name="shRNAseq Tool" version="1.0.7">
   <description>
     Analyse hairpin differential representation using edgeR
   </description>
     
   <requirements>
-    <requirement type="R-module">edgeR</requirement>
-    <requirement type="R-module">limma</requirement>
+    <requirement type="R-module" version="3.5.27">edgeR</requirement>
+    <requirement type="R-module" version="3.18.13">limma</requirement>
   </requirements>
   
   <stdio>
@@ -13,8 +13,8 @@
   </stdio>
   
   <command interpreter="Rscript">
-  hairpinTool.R $inputOpt.type
-                #if $inputOpt.type=="fastq":
+  hairpinTool.R $inputOpt.inputType
+                #if $inputOpt.inputType=="fastq":
                   #for $i, $fas in enumerate($inputOpt.fastq):
                     fastq::$fas.file
                   #end for
@@ -22,7 +22,7 @@
                   $inputOpt.hairpin
                   $inputOpt.samples
                     
-                  #if $inputOpt.positions.option=="yes":
+                  #if $inputOpt.positions.posOption=="yes":
                     $inputOpt.positions.barstart
                     $inputOpt.positions.barend
                     $inputOpt.positions.hpstart
@@ -35,12 +35,12 @@
                   #end if
                 #else:
                   $inputOpt.counts
-                  $inputOpt.anno
-                  "$inputOpt.factors"
+                  $inputOpt.hairpin
+                  $inputOpt.samples
                   0 0 0
                 #end if
           
-                #if $filterCPM.option=="yes":
+                #if $filterCPM.filtOption=="yes":
                   $filterCPM.cpmReq
                   $filterCPM.sampleReq
                 #else:
@@ -57,12 +57,12 @@
                 #if $workMode.mode=="classic":
                   "$workMode.pair1"
                   "$workMode.pair2"
-                #else:
+                #elif $workMode.mode=="glm":
                   "$workMode.contrast"
-                  $workMode.roast.option
-                  #if $workMode.roast.option=="yes":
+                  $workMode.roast.roastOption
+                  #if $workMode.roast.roastOption=="yes":
                     $workMode.roast.hairpinReq
-                    $workMode.roast.select.option
+                    $workMode.roast.select.selOption
                     "$workMode.roast.select.selection"
                   #else:
                     0
@@ -74,7 +74,7 @@
   
   <inputs>
     <conditional name="inputOpt">
-      <param name="type" type="select" label="Input File Type">
+      <param name="inputType" type="select" label="Input File Type">
         <option value="fastq">FastQ File</option>
         <option value="counts">Table of Counts</option>
       </param>
@@ -92,7 +92,7 @@
         </repeat>
           
         <conditional name="positions">
-          <param name="option" type="select" 
+          <param name="posOption" type="select" 
                  label="Specify Barcode and Hairpin Locations?"
                  help="Default Positions: Barcode: 1 to 5, Hairpin: 37 to 57.">
             <option value="no" selected="True">No</option>
@@ -118,15 +118,15 @@
       
       <when value="counts">
         <param name="counts" type="data" format="tabular" label="Counts Table"/>
-        <param name="anno" type="data" format="tabular" 
+        <param name="hairpin" type="data" format="tabular" 
                label="Hairpin Annotation"/>
-        <param name="factors" type="data" format="tabular"
+        <param name="samples" type="data" format="tabular"
                label="Sample Annotation"/> 
       </when>
     </conditional>
     
     <conditional name="filterCPM">
-      <param name="option" type="select" label="Filter Low CPM?"
+      <param name="filtOption" type="select" label="Filter Low CPM?"
        help="Ignore hairpins with very low representation when performing 
              analysis.">
         <option value="yes">Yes</option>
@@ -172,7 +172,7 @@
                      expression."/>
                
         <conditional name="roast">
-          <param name="option" type="select" 
+          <param name="roastOption" type="select" 
                  label="Perform Gene Level Analysis?"
                  help="Analyse LogFC tendencies for hairpins belonging
                        to the same gene.">
@@ -187,7 +187,7 @@
                          be analysed."/>
                          
             <conditional name="select">
-              <param name="option" type="select"
+              <param name="selOption" type="select"
                      label="Gene Selection Method">
                 <option value="rank">By p-value Rank</option>
                 <option value="geneID">By Gene Identifier</option>
@@ -232,7 +232,6 @@
   <outputs>
     <data format="html" name="outFile" label="shRNAseq Analysis"/>
   </outputs>
-  
   <help>
 .. class:: infomark