comparison heinz_scoring.py @ 0:e41ec5af7472 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/heinz commit b0b2c64a46bdd9beebdfb7fc5312f75346483763
author iuc
date Thu, 02 Aug 2018 11:57:44 -0400
parents
children 5f589c91566e
comparison
equal deleted inserted replaced
-1:000000000000 0:e41ec5af7472
1 #!/usr/bin/env python
2 """Calculate scores for Heinz.
3
4 This script transform a p-value into a score:
5 1. Use alpha and lambda to calculate a threshold P-value.
6 2. Calculate a score based on each P-value by alpha and the threshold.
7
8 For more details, please refer to the paper doi:10.1093/bioinformatics/btn161
9
10 Input:
11 P-values from DESeq2 result: first column: names, second column P-values
12 Output:
13 Scores, which will be used as the input of Heinz.
14 First column: names, second column: scores.
15
16 Python 3 is required.
17 """
18 # Implemented by: Chao (Cico) Zhang
19 # Homepage: https://Hi-IT.org
20 # Date: 14 Mar 2017
21 # Last modified: 23 May 2018
22
23 import argparse
24 import sys
25
26 import numpy as np
27 import pandas as pd
28
29
30 parser = argparse.ArgumentParser(description='Transform a P-value into a '
31 'score which can be used as the input of '
32 'Heinz')
33 parser.add_argument('-n', '--node', required=True, dest='nodes',
34 metavar='nodes_pvalue.txt', type=str,
35 help='Input file of nodes with P-values')
36 parser.add_argument('-f', '--fdr', required=True, dest='fdr',
37 metavar='0.007', type=float, help='Choose a value of FDR')
38 parser.add_argument('-m', '--model', required=False, dest='param_file',
39 metavar='param.txt', type=str,
40 help='A txt file contains model params as input')
41 parser.add_argument('-a', '--alpha', required=False, dest='alpha',
42 metavar='0.234', type=float, default=0.5,
43 help='Single parameter alpha as input if txt input is '
44 'not provided')
45 parser.add_argument('-l', '--lambda', required=False, dest='lam',
46 metavar='0.345', type=float, default=0.5,
47 help='Single parameter lambda as input if txt input is '
48 'not provided')
49 parser.add_argument('-o', '--output', required=True, dest='output',
50 metavar='scores.txt', type=str,
51 help='The output file to store the calculated scores')
52 args = parser.parse_args()
53
54 # Check if the parameters are complete
55 if args.output is None:
56 sys.exit('Output file is not designated.')
57
58 if args.nodes is None:
59 sys.exit('Nodes with p-values must be provided.')
60
61 if args.fdr is None:
62 sys.exit('FDR must be provided')
63
64 if args.fdr >= 1 or args.fdr <= 0:
65 sys.exit('FDR must greater than 0 and smaller than 1')
66
67 # run heinz-print according to the input type
68 if args.param_file is not None: # if BUM output is provided
69 with open(args.param_file) as p:
70 params = p.readlines()
71 lam = float(params[0]) # Maybe this is a bug
72 alpha = float(params[1]) # Maybe this is a bug
73 # if BUM output is not provided
74 elif args.alpha is not None and args.lam is not None:
75 lam = args.lam
76 alpha = args.alpha
77 else: # The input is not complete
78 sys.exit('The parameters of the model are incomplete.')
79
80 # Calculate the threshold P-value
81 pie = lam + (1 - lam) * alpha
82 p_threshold = np.power((pie - lam * args.fdr) / (args.fdr - lam * args.fdr),
83 1 / (alpha - 1))
84 print(p_threshold)
85 # Calculate the scores
86 input_pvalues = pd.read_csv(args.nodes, sep='\t', names=['node', 'pvalue'])
87 input_pvalues.loc[:, 'score'] = input_pvalues.pvalue.apply(lambda x:
88 (alpha - 1) *
89 (np.log(x) -
90 np.log(
91 p_threshold)))
92 # print(input_pvalues.loc[:, ['node', 'score']])
93 input_pvalues.loc[:, ['node', 'score']].to_csv(args.output, sep='\t',
94 index=False, header=False)