Mercurial > repos > devteam > poisson2test
changeset 0:8cd5945559b8 draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 27 Jan 2014 09:26:06 -0500 |
parents | |
children | 5ff18ec88181 |
files | poisson2test.py poisson2test.xml test-data/poisson2test1.tabular test-data/poisson2test1_out.tabular test-data/poisson2test2.tabular test-data/poisson2test2_out.tabular tool_dependencies.xml |
diffstat | 7 files changed, 357 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/poisson2test.py Mon Jan 27 09:26:06 2014 -0500 @@ -0,0 +1,124 @@ +#!/usr/local/bin/python + +import sys +from math import * +from rpy import * + + +if ((len(sys.argv)-1) != 6): + print 'too few parameters' + print 'usage: inputfile, col1, col2, d-value(not 0), p-val correction method(0 or 1)' + sys.exit() + +try: + lines_arr = open(sys.argv[1]).readlines() +except IOError: + print'cannot open',sys.argv[1] + sys.exit() + +try: + i = int(sys.argv[2]) #first column to compare + j = int(sys.argv[3]) #second colum to compare + d = float(sys.argv[4]) #correction factor + k = int(sys.argv[5]) #p-val correction method + outfile = open(sys.argv[6],'w') # output data + + if (i>j): + print 'column order not correct col1 < col2' + print 'usage: inputfile, col1, col2, d-value, p-val correction method' + sys.exit() + + try: + a = 1 / d + assert k in [0,1] + except ZeroDivisionError: + print 'd cannot be 0' + print 'usage: inputfile, col1, col2, d-value, p-val correction method' + sys.exit() + except: + print ' p-val correction should be 0 or 1 (0 = "bonferroni", 1 = "fdr")' + print 'usage: inputfile, col1, col2, d-value, p-val correction method' + sys.exit() +except ValueError: + print 'parameters are not integers' + print 'usage: inputfile, col1, col2, d-value, p-val correction method' + sys.exit() + + +fsize = len(lines_arr) + +z1 = [] +z2 = [] +pz1 = [] +pz2 = [] +field = [] + +if d<1: # Z score calculation + for line in lines_arr: + line.strip() + field = line.split('\t') + + x = int(field[j-1]) #input column 2 + y = int(field[i-1]) #input column 1 + if y>x: + z1.append(float((y - ((1/d)*x))/sqrt((1/d)*(x + y)))) + z2.append(float((2*(sqrt(y+(3/8))-sqrt((1/d)*(x+(3/8)))))/sqrt(1+(1/d)))) + else: + tmp_var1 = x + x = y + y = tmp_var1 + z1.append(float((y - (d*x))/sqrt(d*(x + y)))) + z2.append(float((2*(sqrt(y+(3/8))-sqrt(d*(x+(3/8)))))/sqrt(1+d))) + +else: #d>1 Z score calculation + for line in lines_arr: + line.strip() + field = line.split('\t') + x = int(field[i-1]) #input column 1 + y = int(field[j-1]) #input column 2 + + if y>x: + z1.append(float((y - (d*x))/sqrt(d*(x + y)))) + z2.append(float((2*(sqrt(y+(3/8))-sqrt(d*(x+(3/8)))))/sqrt(1+d))) + else: + tmp_var2 = x + x = y + y = tmp_var2 + z1.append(float((y - ((1/d)*x))/sqrt((1/d)*(x + y)))) + z2.append(float((2*(sqrt(y+(3/8))-sqrt((1/d)*(x+(3/8)))))/sqrt(1+(1/d)))) + + + + + +# P-value caluculation for z1 and z2 +for p in z1: + + pz1.append(float(r.pnorm(-abs(float(p))))) + +for q in z2: + + pz2.append(float(r.pnorm(-abs(float(q))))) + +# P-value correction for pz1 and pz2 + +if k == 0: + corrz1 = r.p_adjust(pz1,"bonferroni",fsize) + corrz2 = r.p_adjust(pz2,"bonferroni",fsize) + + +else: + + corrz1 = r.p_adjust(pz1,"fdr",fsize) + corrz2 = r.p_adjust(pz2,"fdr",fsize) + + +#printing all columns +for n in range(fsize): + print >> outfile, "%s\t%4.3f\t%4.3f\t%8.6f\t%8.6f\t%8.6f\t%8.6f" %(lines_arr[n].strip(),z1[n],z2[n],pz1[n],pz2[n],corrz1[n],corrz2[n]) + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/poisson2test.xml Mon Jan 27 09:26:06 2014 -0500 @@ -0,0 +1,113 @@ +<tool id="poisson2test" name="Poisson two-sample test" version="1.0.0"> + <description></description> + <requirements> + <requirement type="package" version="1.0.0">taxonomy</requirement> + </requirements> + <command interpreter="python">poisson2test.py $input1 $input2 $input3 $input4 $input5 $output1 2>/dev/null </command> + <inputs> + <param name="input1" format="tabular" type="data" label="Input File"/> + <param name="input2" type="integer" size="5" value="2" label="First Column"/> + <param name="input3" type="integer" size="5" value="3" label="Second Column"/> + <param name="input4" type="float" size="5" value="1" label="D value"/> + <param name="input5" type="select" label="correction method"> + <option value="0">Bonferroni</option> + <option value="1">FDR</option> + </param> + </inputs> + <outputs> + <data format="tabular" name="output1" /> + </outputs> + <tests> + <test> + <param name="input1" value="poisson2test1.tabular" ftype="tabular"/> + <param name="input2" value="2" /> + <param name="input3" value="3" /> + <param name="input4" value="0.44" /> + <param name="input5" value="0" /> + <output name="output1" file="poisson2test1_out.tabular" /> + </test> + <test> + <param name="input1" value="poisson2test2.tabular" ftype="tabular"/> + <param name="input2" value="2" /> + <param name="input3" value="3" /> + <param name="input4" value="0.44" /> + <param name="input5" value="0" /> + <output name="output1" file="poisson2test2_out.tabular" /> + </test> + </tests> + <help> + +**What it does** + +Suppose you have metagenomic samples from two different locations and have classified the reads unique to various taxa. Now you want to test if the number of reads that fall in a particular taxon in location 1 is different from those that fall in the same taxon in location 2. +This utility performs this analysis. It assumes that the data comes from a Poisson process and calculates two Z scores (Z1 and Z2) based on the work by Shiue and Bain; 1982 (Z1) and Huffman; 1984 (Z2). + +----- + +**Z score formula** + +Equation 1: + +.. image:: ${static_path}/images/poisson2test_eqn1.png + + +Equation 2: + +.. image:: ${static_path}/images/poisson2test_eqn2.png + + +X = number of reads falling in a particular taxon in location 1 + +Y = number of reads falling in the same taxon in location 2 + +d = correction factor that accounts for biases in sample collection, DNA concentration, read numbers etc. between the two locations. + +Not only that, this utility also provides corresponding p-values and corrected p-values (using Bonferroni or False Discovery Rate (FDR)). It takes in an input file (a tab delimited file consisting of three or more columns (taxon/category, read counts in location 1, read counts in location 2)), columns to compare, d value and a correction method 0 (Bonferroni) or 1 (FDR). + +----- + +**Example** + +- Input File: phylum, read count in location-1, read count in location-2:: + + Annelida 36 2 + Apicomplexa 17 8 + Arthropoda 1964 928 + Ascomycota 436 49 + Basidiomycota 77 55 + +- Arguments to be supplied by the user:: + + col_i col_j d-value correction-method + + 2 3 0.44 Bonferroni + +- Output File: phylum, readcount1, readcount2, z1, z2, p1, p2, corrected p1, corrected p2:: + + Annelida 36 2 3.385 4.276 0.000356 0.000010 0.00463 0.00012 + Apicomplexa 17 8 -0.157 -0.156 0.437707 0.438103 1.00000 1.00000 + Arthropoda 1964 928 -1.790 -1.777 0.036755 0.037744 0.47782 0.49067 + Ascomycota 436 49 9.778 11.418 0.000000 0.000000 0.00000 0.00000 + Basidiomycota 77 55 -2.771 -2.659 0.002792 0.003916 0.03629 0.05091 + +----- + +**Note** + +- Input file should be Tab delimited +- i < j +- d cannot be 0 +- k = Bonferroni or FDR + +----- + +**References** + +- Shiue, W. and Bain, L. (1982). Experiment Size and Power Comparisons for Two-Sample Poisson Tests. Applied Statistics 31, 130-134. + +- Huffman, M. D. (1984). An Improved Approximate Two-Sample Poisson Test. Applied Statistics 33, 224-226. + + </help> +</tool> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/poisson2test1.tabular Mon Jan 27 09:26:06 2014 -0500 @@ -0,0 +1,5 @@ +Acinetobacter 37 7 +Acyrthosiphon 70 21 +aedes 61 13 +Aeromonas 169 0 +anopheles 145 97
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/poisson2test1_out.tabular Mon Jan 27 09:26:06 2014 -0500 @@ -0,0 +1,5 @@ +Acinetobacter 37 7 2.109 2.315 0.017468 0.010302 0.087342 0.051510 +Acyrthosiphon 70 21 1.549 1.612 0.060722 0.053481 0.303609 0.267406 +aedes 61 13 2.425 2.625 0.007645 0.004329 0.038223 0.021643 +Aeromonas 169 0 8.623 14.372 0.000000 0.000000 0.000000 0.000000 +anopheles 145 97 -3.217 -3.102 0.000647 0.000960 0.003234 0.004801
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/poisson2test2.tabular Mon Jan 27 09:26:06 2014 -0500 @@ -0,0 +1,52 @@ +Acyrthosiphon pisum 55 54 +Aedes aegypti 246 72 +Anopheles gambiae 337 168 +Apis mellifera 33 11 +Aspergillus niger 292 29 +Batrachochytrium dendrobatidis 17 14 +Bombyx mori 2 16 +Bos taurus 16 71 +Branchiostoma floridae 44 1 +Caenorhabditis briggsae 269 121 +Caenorhabditis remanei 35 29 +Chlamydomonas reinhardtii 85 13 +Citrus sinensis 14 2 +Culex pipiens 408 81 +Daphnia pulex 213 75 +Drosophila ananassae 3 20 +Drosophila grimshawi 14 10 +Drosophila pseudoobscura 28 22 +Drosophila willistoni 4 18 +Emiliania huxleyi 56 13 +Glycine max 4019 1831 +Helobdella robusta 33 1 +Homo sapiens 59 6 +Hyaloperonospora parasitica 48 10 +Hydra magnipapillata 9 65 +Medicago truncatula 62 42 +Mimulus guttatus 12 9 +Mus musculus 18 5 +Mycosphaerella fijiensis 42 7 +Nasonia vitripennis 10 12 +Nectria haematococca 67 2 +Oryza sativa 6068 2561 +Paramecium tetraurelia 749 296 +Pediculus humanus 49 40 +Phakopsora pachyrhizi 66 53 +Physcomitrella patens 304 36 +Phytophthora ramorum 174 56 +Phytophthora sojae 10 0 +Pinus taeda 0 26 +Populus balsamifera 43 4 +Pristionchus pacificus 4 14 +Rattus norvegicus 11 3 +Rhodnius prolixus 24 17 +Ricinus communis 75 14 +Schistosoma mansoni 80 15 +Schmidtea mediterranea 307 277 +Selaginella moellendorffii 27 10 +Sorghum bicolor 306 72 +Strongylocentrotus purpuratus 182 34 +Trypanosoma cruzi 23 0 +Volvox carteri 64 2 +Zea mays 583 263
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/poisson2test2_out.tabular Mon Jan 27 09:26:06 2014 -0500 @@ -0,0 +1,52 @@ +Acyrthosiphon pisum 55 54 -4.303 -4.049 0.000008 0.000026 0.000438 0.001340 +Aedes aegypti 246 72 3.064 3.198 0.001093 0.000693 0.056838 0.036029 +Anopheles gambiae 337 168 -1.323 -1.307 0.092930 0.095535 1.000000 1.000000 +Apis mellifera 33 11 0.800 0.823 0.211855 0.205213 1.000000 1.000000 +Aspergillus niger 292 29 8.371 9.916 0.000000 0.000000 0.000000 0.000000 +Batrachochytrium dendrobatidis 17 14 -1.765 -1.678 0.038749 0.046690 1.000000 1.000000 +Bombyx mori 2 16 5.373 5.103 0.000000 0.000000 0.000002 0.000009 +Bos taurus 16 71 10.338 9.621 0.000000 0.000000 0.000000 0.000000 +Branchiostoma floridae 44 1 4.126 5.667 0.000018 0.000000 0.000959 0.000000 +Caenorhabditis briggsae 269 121 -0.202 -0.201 0.420141 0.420309 1.000000 1.000000 +Caenorhabditis remanei 35 29 -2.563 -2.435 0.005191 0.007450 0.269927 0.387398 +Chlamydomonas reinhardtii 85 13 3.716 4.183 0.000101 0.000014 0.005267 0.000747 +Citrus sinensis 14 2 1.568 1.780 0.058457 0.037576 1.000000 1.000000 +Culex pipiens 408 81 6.717 7.331 0.000000 0.000000 0.000000 0.000000 +Daphnia pulex 213 75 1.663 1.701 0.048160 0.044463 1.000000 1.000000 +Drosophila ananassae 3 20 5.872 5.539 0.000000 0.000000 0.000000 0.000001 +Drosophila grimshawi 14 10 -1.182 -1.134 0.118667 0.128417 1.000000 1.000000 +Drosophila pseudoobscura 28 22 -2.064 -1.967 0.019519 0.024570 1.000000 1.000000 +Drosophila willistoni 4 18 5.220 4.860 0.000000 0.000001 0.000005 0.000031 +Emiliania huxleyi 56 13 2.113 2.264 0.017321 0.011791 0.900675 0.613145 +Glycine max 4019 1831 -1.235 -1.231 0.108478 0.109251 1.000000 1.000000 +Helobdella robusta 33 1 3.496 4.684 0.000237 0.000001 0.012302 0.000073 +Homo sapiens 59 6 3.732 4.409 0.000095 0.000005 0.004933 0.000270 +Hyaloperonospora parasitica 48 10 2.201 2.389 0.013860 0.008448 0.720722 0.439307 +Hydra magnipapillata 9 65 10.697 10.120 0.000000 0.000000 0.000000 0.000000 +Medicago truncatula 62 42 -2.176 -2.096 0.014777 0.018033 0.768379 0.937696 +Mimulus guttatus 12 9 -1.224 -1.170 0.110516 0.120942 1.000000 1.000000 +Mus musculus 18 5 0.918 0.964 0.179337 0.167614 1.000000 1.000000 +Mycosphaerella fijiensis 42 7 2.472 2.755 0.006711 0.002933 0.348951 0.152533 +Nasonia vitripennis 10 12 2.443 2.277 0.007288 0.011379 0.378990 0.591708 +Nectria haematococca 67 2 4.987 6.692 0.000000 0.000000 0.000016 0.000000 +Oryza sativa 6068 2561 1.768 1.775 0.038558 0.037957 1.000000 1.000000 +Paramecium tetraurelia 749 296 1.565 1.582 0.058782 0.056837 1.000000 1.000000 +Pediculus humanus 49 40 -2.947 -2.802 0.001606 0.002538 0.083501 0.131991 +Phakopsora pachyrhizi 66 53 -3.311 -3.152 0.000464 0.000811 0.024152 0.042153 +Physcomitrella patens 304 36 7.993 9.276 0.000000 0.000000 0.000000 0.000000 +Phytophthora ramorum 174 56 2.044 2.111 0.020488 0.017390 1.000000 0.904295 +Phytophthora sojae 10 0 2.098 3.496 0.017969 0.000236 0.934412 0.012278 +Pinus taeda 0 26 7.687 8.498 0.000000 0.000000 0.000000 0.000000 +Populus balsamifera 43 4 3.281 3.916 0.000517 0.000045 0.026903 0.002339 +Pristionchus pacificus 4 14 4.349 4.025 0.000007 0.000028 0.000355 0.001481 +Rattus norvegicus 11 3 0.741 0.780 0.229238 0.217720 1.000000 1.000000 +Rhodnius prolixus 24 17 -1.516 -1.456 0.064729 0.072722 1.000000 1.000000 +Ricinus communis 75 14 3.036 3.338 0.001198 0.000422 0.062288 0.021926 +Schistosoma mansoni 80 15 3.124 3.433 0.000891 0.000298 0.046328 0.015504 +Schmidtea mediterranea 307 277 -8.853 -8.368 0.000000 0.000000 0.000000 0.000000 +Selaginella moellendorffii 27 10 0.466 0.474 0.320629 0.317714 1.000000 1.000000 +Sorghum bicolor 306 72 4.857 5.197 0.000001 0.000000 0.000031 0.000005 +Strongylocentrotus purpuratus 182 34 4.727 5.196 0.000001 0.000000 0.000059 0.000005 +Trypanosoma cruzi 23 0 3.181 5.302 0.000733 0.000000 0.038134 0.000003 +Volvox carteri 64 2 4.854 6.487 0.000001 0.000000 0.000031 0.000000 +Zea mays 583 263 -0.336 -0.335 0.368487 0.368792 1.000000 1.000000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Jan 27 09:26:06 2014 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="taxonomy" version="1.0.0"> + <repository changeset_revision="c011d4659306" name="package_taxonomy_1_0_0" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>