diff fisher_test.xml @ 0:8cb991523c5a draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/fisher_test commit f9c229fc39114e80cf01383d7240e4971b2e3c53
author drosofff
date Thu, 09 Mar 2017 12:31:03 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fisher_test.xml	Thu Mar 09 12:31:03 2017 -0500
@@ -0,0 +1,75 @@
+<tool id="fishertest" name="Fisher's exact test" version="0.9.0">
+	<description>on two gene hit lists</description>
+        <requirements>
+            <requirement type="package" version="3.1.2">R</requirement>
+            <requirement type="package" version="2.14">biocbasics</requirement>
+            <requirement type="package" version="2.4.2=r3.3.1_0">bioconductor-qvalue</requirement>
+        </requirements>
+        <command><![CDATA[
+            Rscript '$fisher_test' "\${GALAXY_SLOTS:-1}"
+    ]]></command>
+  <configfiles>
+    <configfile name="fisher_test">
+    <![CDATA[
+      ## Setup R error handling to go to stderr
+      options( show.error.messages=F,  error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+      options(warn=-1)
+      suppressMessages(library(qvalue))
+      library(parallel)
+      args = commandArgs(trailingOnly = TRUE)
+      slots = as.numeric(args[1])
+      countsTable = read.delim("${input}", header=TRUE, check.names=FALSE, stringsAsFactor = TRUE)
+      depth1 = sum(countsTable[,2])
+      depth2 = sum(countsTable[,3])
+      float_table=data.frame(countsTable[,2], countsTable[,3])
+
+      calc_pvalue <- function(row, depth1, depth2, ... ){
+        thearray = array( c(row, (depth1 - row[1]), (depth2 - row[2])), dim=c(2,2))
+        current_test = fisher.test( thearray )
+        return(current_test\$p.value)
+      }
+
+      cl <- makePSOCKcluster(slots)
+      clusterExport(cl=cl, varlist=c("calc_pvalue", "depth1", "depth2"))
+      ptm <- proc.time()
+      p_val = parApply(cl, float_table, 1, function(x) calc_pvalue(x, depth1, depth2))
+      stopCluster(cl)
+      proc.time() - ptm
+      p_val[p_val>1]=1
+      p = qvalue(p_val)
+      finalTable = cbind(countsTable, data.frame(p\$pvalues), data.frame(p\$qvalues))
+      write.table ( finalTable, file = "${output}", row.names=FALSE, col.names=TRUE, quote= FALSE, dec = ".", sep = "\t", eol = "\n")
+    ]]>
+    </configfile>
+  </configfiles>
+        <inputs>
+                <param name="input" type="data" format="tabular" label="gene hit lists, 2 samples"/>
+        </inputs>
+        <outputs>
+                <data name="output" format="tabular" label="Fisher test p-values" />
+        </outputs>
+<tests>
+    <test>
+        <param name="input" value="counts.tab" ftype="tabular"/>
+        <output name="output" file="fisher.tab" ftype="tabular"/>
+    </test>
+</tests>
+<help>
+
+**What it does**
+
+Runs Fisher's exact test for testing the null of independence of rows and columns in a contingency table of two columns.
+
+p.pvalues: the chance of getting this data if it is independent between columns (false negative); the p-value.
+
+q.qvalues: FDR (Faslse Detection Rate) adjusted p-values; a q-value of 0.05 implies that 5% of significant tests will result in false positives.
+
+Be aware that this test does not take into account the biological noise that would be visible if replicates were available.
+
+
+</help>
+<citations>
+    <citation type="doi">10.1111/1467-9868.00346</citation>
+</citations>
+    
+</tool>