Mercurial > repos > mvdbeek > damid_deseq2_to_bedgraph
annotate damid_to_bedgraph.py @ 0:755cbe6825b5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
| author | mvdbeek | 
|---|---|
| date | Fri, 14 Dec 2018 06:27:41 -0500 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 
0
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
1 from collections import OrderedDict | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
2 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
3 import click | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
4 import numpy as np | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
5 import pandas as pd | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
6 import traces | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
7 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
8 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
9 def order_index(df): | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
10 """ | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
11 Split chr_start_stop in df index and order by chrom and start. | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
12 """ | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
13 idx = df.index.str.split('_') | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
14 idx = pd.DataFrame.from_records(list(idx)) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
15 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
16 idx.columns = ['chr', 'start', 'stop'] | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
17 idx = idx.astype(dtype={"chr": "object", | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
18 "start": "int32", | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
19 "stop": "int32"}) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
20 coordinates = idx.sort_values(['chr', 'start']) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
21 df.index = np.arange(len(df.index)) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
22 df = df.loc[coordinates.index] | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
23 df = coordinates.join(df) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
24 # index is center of GATC site | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
25 df.index = df['start'] + 2 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
26 return df | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
27 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
28 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
29 def interpolate_values(df, sampling_width=100): | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
30 result = [] | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
31 for chrom in df['chr'].unique(): | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
32 chrom_df = df[df['chr'] == chrom] | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
33 time_series = traces.TimeSeries(chrom_df['log2FC']) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
34 s = pd.DataFrame.from_records(time_series.sample(sampling_width, interpolate='linear')) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
35 # Calculate new start and end of interpolated region | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
36 start = s[0] - int(sampling_width / 2) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
37 start.loc[start < 0] = 1 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
38 end = s[0] + int(sampling_width / 2) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
39 result.append(pd.DataFrame(OrderedDict([('chr', chrom), ('start', start), ('end', end), ('score', s[1])]))) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
40 return pd.concat(result) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
41 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
42 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
43 @click.command() | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
44 @click.argument('input_path', type=click.Path(exists=True), required=True) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
45 @click.argument('output_path', type=click.Path(exists=False), required=True) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
46 @click.option('--resolution', help="Interpolate log2 fold change at this resolution (in basepairs)", default=50) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
47 def deseq2_to_bedgraph(input_path, output_path, resolution=50): | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
48 """Convert deseq2 output on GATC fragments to bedgraph file with interpolated values.""" | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
49 df = pd.read_csv(input_path, sep='\t', header=None, index_col=0, usecols=[0, 2], names=['GATC', 'log2FC']) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
50 df = df[~df.index.str.contains('\.')] | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
51 df = order_index(df) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
52 r = interpolate_values(df, sampling_width=resolution) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
53 r.to_csv(output_path, sep='\t', header=None, index=None) | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
54 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
55 | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
56 if __name__ == '__main__': | 
| 
 
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
 
mvdbeek 
parents:  
diff
changeset
 | 
57 deseq2_to_bedgraph() | 
